modelscope · Yunnglin · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -37,7 +37,7 @@ RUN pip install flash-linear-attention -U --no-cache-dir
 RUN pip install numpy==2.2 --no-cache-dir
 
 # Install tinker, ray, and other deps
-RUN pip install --no-cache-dir tinker==0.14.0 "ray[serve]" transformers peft accelerate -U
+RUN pip install --no-cache-dir tinker==0.16.1 "ray[serve]" transformers peft accelerate -U
 
 # Clone and install twinkle, checkout to latest v-tag
 RUN git clone https://github.com/modelscope/twinkle.git

diff --git a/cookbook/client/server/megatron/run.sh b/cookbook/client/server/megatron/run.sh
@@ -271,7 +271,7 @@ pkill -f "twinkle.server" 2>/dev/null || true
 
 # 停止 vLLM 进程
 print_info "停止已有的 vLLM 进程..."
-pkill -f "vllm" 2>/dev/null || true
+pkill -if "vLLM" 2>/dev/null || true
 
 # 等待上述进程退出
 sleep 2
@@ -281,9 +281,9 @@ if pgrep -f "twinkle.server" > /dev/null 2>&1; then
     print_warning "Twinkle Server 未退出，强制终止..."
     pkill -9 -f "twinkle.server" 2>/dev/null || true
 fi
-if pgrep -f "vllm" > /dev/null 2>&1; then
+if pgrep -if "vLLM" > /dev/null 2>&1; then
     print_warning "vLLM 进程未退出，强制终止..."
-    pkill -9 -f "vllm" 2>/dev/null || true
+    pkill -9if "vLLM" 2>/dev/null || true
 fi
 
 print_info "停止已有的 Ray 集群..."
@@ -385,10 +385,6 @@ print_info "日志输出到: $LOG_FILE"
 echo ""
 
 # 启动服务器并实时显示日志
-touch "$LOG_FILE"  # 预创建文件，避免 tail -f 在文件尚未写入时报错
 nohup python -m twinkle.server --config "$SERVER_CONFIG_FILE" > "$LOG_FILE" 2>&1 &
 SERVER_PID=$!
 print_success "Twinkle Server 已启动 (PID: $SERVER_PID)"
-
-# 实时显示日志
-tail -f "$LOG_FILE"
diff --git a/src/twinkle/server/model/backends/common.py b/src/twinkle/server/model/backends/common.py
@@ -164,22 +164,21 @@ def _ensure_dpo_metric(self, adapter_name: str, beta: float):
 
     def _tinker_build_output(self, inputs, outputs):
         """Extract logits/logps from model outputs and build per-datum output list."""
-        logits = outputs.get('logits')
-        if logits is not None:
-            logits = self._normalize_tensor_output(logits)
-        logps = outputs.get('logps', None)
-        if logps is not None:
-            logps = self._normalize_tensor_output(logps)
+        logits = self._normalize_tensor_output(outputs.get('logits'))
+        logps = self._normalize_tensor_output(outputs.get('logps'))
+        if logits is None and logps is None:
+            # non-last PP stage: no outputs produced, collector will discard this
+            return []
         return self._get_forward_output(inputs, logits, logps)
 
     @staticmethod
     def _normalize_tensor_output(value):
         """Normalize various output formats (tensor, list of tensors, nested lists, floats) to a single tensor.
 
         Handles:
+        - None or empty list: returns None
         - torch.Tensor: detach and move to cpu
         - list of torch.Tensor: cat along dim=0
-        - nested lists: recursively flatten and cat
         - list of floats/int: convert to tensor
         """
         if value is None:
@@ -189,6 +188,10 @@ def _normalize_tensor_output(value):
             return value.detach().cpu()
 
         if isinstance(value, list):
+            if not value:  # empty list (e.g. non-last PP stage): treat as missing
+                return None
+            if isinstance(value[0], torch.Tensor):
+                return torch.cat(value, dim=0).detach().cpu()
             return torch.as_tensor(value, dtype=torch.float32).detach().cpu()
 
         if isinstance(value, (int, float)):