Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ RUN pip install flash-linear-attention -U --no-cache-dir
RUN pip install numpy==2.2 --no-cache-dir

# Install tinker, ray, and other deps
RUN pip install --no-cache-dir tinker==0.14.0 "ray[serve]" transformers peft accelerate -U
RUN pip install --no-cache-dir tinker==0.16.1 "ray[serve]" transformers peft accelerate -U
Comment thread
Yunnglin marked this conversation as resolved.

# Clone and install twinkle, checkout to latest v-tag
RUN git clone https://github.com/modelscope/twinkle.git
Expand Down
10 changes: 3 additions & 7 deletions cookbook/client/server/megatron/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ pkill -f "twinkle.server" 2>/dev/null || true

# 停止 vLLM 进程
print_info "停止已有的 vLLM 进程..."
pkill -f "vllm" 2>/dev/null || true
pkill -if "vLLM" 2>/dev/null || true

# 等待上述进程退出
sleep 2
Expand All @@ -281,9 +281,9 @@ if pgrep -f "twinkle.server" > /dev/null 2>&1; then
print_warning "Twinkle Server 未退出,强制终止..."
pkill -9 -f "twinkle.server" 2>/dev/null || true
fi
if pgrep -f "vllm" > /dev/null 2>&1; then
if pgrep -if "vLLM" > /dev/null 2>&1; then
print_warning "vLLM 进程未退出,强制终止..."
pkill -9 -f "vllm" 2>/dev/null || true
pkill -9if "vLLM" 2>/dev/null || true
fi

print_info "停止已有的 Ray 集群..."
Expand Down Expand Up @@ -385,10 +385,6 @@ print_info "日志输出到: $LOG_FILE"
echo ""

# 启动服务器并实时显示日志
touch "$LOG_FILE" # 预创建文件,避免 tail -f 在文件尚未写入时报错
nohup python -m twinkle.server --config "$SERVER_CONFIG_FILE" > "$LOG_FILE" 2>&1 &
SERVER_PID=$!
print_success "Twinkle Server 已启动 (PID: $SERVER_PID)"

# 实时显示日志
tail -f "$LOG_FILE"
17 changes: 10 additions & 7 deletions src/twinkle/server/model/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,22 +164,21 @@ def _ensure_dpo_metric(self, adapter_name: str, beta: float):

def _tinker_build_output(self, inputs, outputs):
"""Extract logits/logps from model outputs and build per-datum output list."""
logits = outputs.get('logits')
if logits is not None:
logits = self._normalize_tensor_output(logits)
logps = outputs.get('logps', None)
if logps is not None:
logps = self._normalize_tensor_output(logps)
logits = self._normalize_tensor_output(outputs.get('logits'))
logps = self._normalize_tensor_output(outputs.get('logps'))
if logits is None and logps is None:
# non-last PP stage: no outputs produced, collector will discard this
return []
return self._get_forward_output(inputs, logits, logps)

@staticmethod
def _normalize_tensor_output(value):
"""Normalize various output formats (tensor, list of tensors, nested lists, floats) to a single tensor.

Handles:
- None or empty list: returns None
- torch.Tensor: detach and move to cpu
Comment thread
Yunnglin marked this conversation as resolved.
- list of torch.Tensor: cat along dim=0
- nested lists: recursively flatten and cat
- list of floats/int: convert to tensor
"""
if value is None:
Expand All @@ -189,6 +188,10 @@ def _normalize_tensor_output(value):
return value.detach().cpu()

if isinstance(value, list):
if not value: # empty list (e.g. non-last PP stage): treat as missing
return None
if isinstance(value[0], torch.Tensor):
return torch.cat(value, dim=0).detach().cpu()
Comment thread
Yunnglin marked this conversation as resolved.
return torch.as_tensor(value, dtype=torch.float32).detach().cpu()

if isinstance(value, (int, float)):
Expand Down
Loading