Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified tools/server/public/index.html.gz
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,11 @@
label: 'Disable reasoning content parsing',
type: SettingsFieldType.CHECKBOX
},
{
key: SETTINGS_KEYS.EXCLUDE_REASONING_FROM_CONTEXT,
label: 'Exclude reasoning from context',
type: SettingsFieldType.CHECKBOX
},
{
key: SETTINGS_KEYS.SHOW_RAW_OUTPUT_SWITCH,
label: 'Enable raw output toggle',
Expand Down
2 changes: 2 additions & 0 deletions tools/server/webui/src/lib/constants/agentic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ export const AGENTIC_REGEX = {
PARTIAL_MARKER: /<<<[A-Za-z_]*$/,
// Matches reasoning content blocks (including tags)
REASONING_BLOCK: /<<<reasoning_content_start>>>[\s\S]*?<<<reasoning_content_end>>>/g,
// Captures the reasoning text between start/end tags
REASONING_EXTRACT: /<<<reasoning_content_start>>>([\s\S]*?)<<<reasoning_content_end>>>/,
// Matches an opening reasoning tag and any remaining content (unterminated)
REASONING_OPEN: /<<<reasoning_content_start>>>[\s\S]*$/,
// Matches a complete agentic tool call display block (start to end marker)
Expand Down
3 changes: 3 additions & 0 deletions tools/server/webui/src/lib/constants/settings-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export const SETTING_CONFIG_DEFAULT: Record<string, string | number | boolean |
theme: ColorMode.SYSTEM,
showThoughtInProgress: false,
disableReasoningParsing: false,
excludeReasoningFromContext: false,
showRawOutputSwitch: false,
keepStatsVisible: false,
showMessageStats: true,
Expand Down Expand Up @@ -106,6 +107,8 @@ export const SETTING_CONFIG_INFO: Record<string, string> = {
showThoughtInProgress: 'Expand thought process by default when generating messages.',
disableReasoningParsing:
'Send reasoning_format=none to prevent server-side extraction of reasoning tokens into separate field',
excludeReasoningFromContext:
'Strip reasoning content from previous messages before sending to the model. When unchecked, reasoning is sent back via the reasoning_content field so the model can see its own chain-of-thought across turns.',
showRawOutputSwitch:
'Show toggle button to display messages as plain text instead of Markdown-formatted content',
keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
Expand Down
1 change: 1 addition & 0 deletions tools/server/webui/src/lib/constants/settings-keys.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ export const SETTINGS_KEYS = {
SHOW_TOOL_CALL_IN_PROGRESS: 'showToolCallInProgress',
// Developer
DISABLE_REASONING_PARSING: 'disableReasoningParsing',
EXCLUDE_REASONING_FROM_CONTEXT: 'excludeReasoningFromContext',
SHOW_RAW_OUTPUT_SWITCH: 'showRawOutputSwitch',
CUSTOM: 'custom'
} as const;
69 changes: 60 additions & 9 deletions tools/server/webui/src/lib/services/chat.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,46 @@ export class ChatService {
*
*/

/**
* Extracts reasoning text from content that contains internal reasoning tags.
* Returns the concatenated reasoning content or undefined if none found.
*/
private static extractReasoningFromContent(
content: ApiChatMessageData['content'] | null | undefined
): string | undefined {
if (!content) return undefined;

const extractFromString = (text: string): string => {
const parts: string[] = [];
// Use a fresh regex instance to avoid shared lastIndex state
const re = new RegExp(AGENTIC_REGEX.REASONING_EXTRACT.source);
let match = re.exec(text);
while (match) {
parts.push(match[1]);
// advance past the matched portion and retry
text = text.slice(match.index + match[0].length);
match = re.exec(text);
}
return parts.join('');
};

if (typeof content === 'string') {
const result = extractFromString(content);
return result || undefined;
}

if (!Array.isArray(content)) return undefined;

const parts: string[] = [];
for (const part of content) {
if (part.type === ContentPartType.TEXT && part.text) {
const result = extractFromString(part.text);
if (result) parts.push(result);
}
}
return parts.length > 0 ? parts.join('') : undefined;
}

/**
* Sends a chat completion request to the llama.cpp server.
* Supports both streaming and non-streaming responses with comprehensive parameter configuration.
Expand Down Expand Up @@ -111,7 +151,8 @@ export class ChatService {
custom,
timings_per_token,
// Config options
disableReasoningParsing
disableReasoningParsing,
excludeReasoningFromContext
} = options;

const normalizedMessages: ApiChatMessageData[] = messages
Expand Down Expand Up @@ -159,14 +200,24 @@ export class ChatService {
}

const requestBody: ApiChatCompletionRequest = {
messages: normalizedMessages.map((msg: ApiChatMessageData) => ({
role: msg.role,
// Strip reasoning tags/content from the prompt to avoid polluting KV cache.
// TODO: investigate backend expectations for reasoning tags and add a toggle if needed.
content: ChatService.stripReasoningContent(msg.content),
tool_calls: msg.tool_calls,
tool_call_id: msg.tool_call_id
})),
messages: normalizedMessages.map((msg: ApiChatMessageData) => {
// Always strip internal reasoning/agentic tags from content
const cleanedContent = ChatService.stripReasoningContent(msg.content);
const mapped: ApiChatCompletionRequest['messages'][0] = {
role: msg.role,
content: cleanedContent,
tool_calls: msg.tool_calls,
tool_call_id: msg.tool_call_id
};
// When preserving reasoning, extract it from raw content and send as separate field
if (!excludeReasoningFromContext) {
const reasoning = ChatService.extractReasoningFromContent(msg.content);
if (reasoning) {
mapped.reasoning_content = reasoning;
}
}
return mapped;
}),
stream,
return_progress: stream ? true : undefined,
tools: tools && tools.length > 0 ? tools : undefined
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,12 @@ export const SYNCABLE_PARAMETERS: SyncableParameter[] = [
serverKey: 'alwaysShowAgenticTurns',
type: SyncableParameterType.BOOLEAN,
canSync: true
},
{
key: 'excludeReasoningFromContext',
serverKey: 'excludeReasoningFromContext',
type: SyncableParameterType.BOOLEAN,
canSync: true
}
];

Expand Down
2 changes: 2 additions & 0 deletions tools/server/webui/src/lib/stores/chat.svelte.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1479,6 +1479,8 @@ class ChatStore {

if (currentConfig.disableReasoningParsing) apiOptions.disableReasoningParsing = true;

if (currentConfig.excludeReasoningFromContext) apiOptions.excludeReasoningFromContext = true;

if (hasValue(currentConfig.temperature))
apiOptions.temperature = Number(currentConfig.temperature);

Expand Down
4 changes: 4 additions & 0 deletions tools/server/webui/src/lib/types/api.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export interface ApiErrorResponse {
export interface ApiChatMessageData {
role: ChatRole;
content: string | ApiChatMessageContentPart[];
reasoning_content?: string;
tool_calls?: ApiChatCompletionToolCall[];
tool_call_id?: string;
timestamp?: number;
Expand Down Expand Up @@ -201,6 +202,9 @@ export interface ApiChatCompletionRequest {
messages: Array<{
role: ChatRole;
content: string | ApiChatMessageContentPart[];
reasoning_content?: string;
tool_calls?: ApiChatCompletionToolCall[];
tool_call_id?: string;
}>;
stream?: boolean;
model?: string;
Expand Down
2 changes: 2 additions & 0 deletions tools/server/webui/src/lib/types/settings.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ export interface SettingsChatServiceOptions {
systemMessage?: string;
// Disable reasoning parsing (use 'none' instead of 'auto')
disableReasoningParsing?: boolean;
// Strip reasoning content from context before sending
excludeReasoningFromContext?: boolean;
tools?: OpenAIToolDefinition[];
// Generation parameters
temperature?: number;
Expand Down
196 changes: 196 additions & 0 deletions tools/server/webui/tests/unit/reasoning-context.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import { describe, it, expect } from 'vitest';
import { AGENTIC_REGEX, REASONING_TAGS } from '$lib/constants/agentic';
import { ContentPartType } from '$lib/enums';

// Replicate ChatService.extractReasoningFromContent (private static)
function extractReasoningFromContent(
content: string | Array<{ type: string; text?: string }> | null | undefined
): string | undefined {
if (!content) return undefined;

const extractFromString = (text: string): string => {
const parts: string[] = [];
const re = new RegExp(AGENTIC_REGEX.REASONING_EXTRACT.source);
let match = re.exec(text);
while (match) {
parts.push(match[1]);
text = text.slice(match.index + match[0].length);
match = re.exec(text);
}
return parts.join('');
};

if (typeof content === 'string') {
const result = extractFromString(content);
return result || undefined;
}

if (!Array.isArray(content)) return undefined;

const parts: string[] = [];
for (const part of content) {
if (part.type === ContentPartType.TEXT && part.text) {
const result = extractFromString(part.text);
if (result) parts.push(result);
}
}
return parts.length > 0 ? parts.join('') : undefined;
}

// Replicate ChatService.stripReasoningContent (private static)
function stripReasoningContent(
content: string | Array<{ type: string; text?: string }> | null | undefined
): typeof content {
if (!content) return content;

if (typeof content === 'string') {
return content
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
.replace(AGENTIC_REGEX.REASONING_OPEN, '')
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '');
}

if (!Array.isArray(content)) return content;

return content.map((part) => {
if (part.type !== ContentPartType.TEXT || !part.text) return part;
return {
...part,
text: part.text
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
.replace(AGENTIC_REGEX.REASONING_OPEN, '')
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '')
};
});
}

// Simulate the message mapping logic from ChatService.sendMessage
function buildApiMessage(
content: string,
excludeReasoningFromContext: boolean
): { role: string; content: string; reasoning_content?: string } {
const cleaned = stripReasoningContent(content) as string;
const mapped: { role: string; content: string; reasoning_content?: string } = {
role: 'assistant',
content: cleaned
};
if (!excludeReasoningFromContext) {
const reasoning = extractReasoningFromContent(content);
if (reasoning) {
mapped.reasoning_content = reasoning;
}
}
return mapped;
}

// Helper: wrap reasoning the same way the chat store does during streaming
function wrapReasoning(reasoning: string, content: string): string {
return `${REASONING_TAGS.START}${reasoning}${REASONING_TAGS.END}${content}`;
}

describe('reasoning content extraction', () => {
it('extracts reasoning from tagged string content', () => {
const input = wrapReasoning('step 1, step 2', 'The answer is 42.');
const result = extractReasoningFromContent(input);
expect(result).toBe('step 1, step 2');
});

it('returns undefined when no reasoning tags present', () => {
expect(extractReasoningFromContent('Just a normal response.')).toBeUndefined();
});

it('returns undefined for null/empty input', () => {
expect(extractReasoningFromContent(null)).toBeUndefined();
expect(extractReasoningFromContent(undefined)).toBeUndefined();
expect(extractReasoningFromContent('')).toBeUndefined();
});

it('extracts reasoning from content part arrays', () => {
const input = [
{
type: ContentPartType.TEXT,
text: wrapReasoning('thinking hard', 'result')
}
];
expect(extractReasoningFromContent(input)).toBe('thinking hard');
});

it('handles multiple reasoning blocks', () => {
const input =
REASONING_TAGS.START +
'block1' +
REASONING_TAGS.END +
'middle' +
REASONING_TAGS.START +
'block2' +
REASONING_TAGS.END +
'end';
expect(extractReasoningFromContent(input)).toBe('block1block2');
});

it('ignores non-text content parts', () => {
const input = [{ type: 'image_url', text: wrapReasoning('hidden', 'img') }];
expect(extractReasoningFromContent(input)).toBeUndefined();
});
});

describe('strip reasoning content', () => {
it('removes reasoning tags from string content', () => {
const input = wrapReasoning('internal thoughts', 'visible answer');
expect(stripReasoningContent(input)).toBe('visible answer');
});

it('removes reasoning from content part arrays', () => {
const input = [
{
type: ContentPartType.TEXT,
text: wrapReasoning('thoughts', 'answer')
}
];
const result = stripReasoningContent(input) as Array<{ type: string; text?: string }>;
expect(result[0].text).toBe('answer');
});
});

describe('API message building with reasoning preservation', () => {
const storedContent = wrapReasoning('Let me think: 2+2=4, basic arithmetic.', 'The answer is 4.');

it('preserves reasoning_content when excludeReasoningFromContext is false', () => {
const msg = buildApiMessage(storedContent, false);
expect(msg.content).toBe('The answer is 4.');
expect(msg.reasoning_content).toBe('Let me think: 2+2=4, basic arithmetic.');
// no internal tags leak into either field
expect(msg.content).not.toContain('<<<');
expect(msg.reasoning_content).not.toContain('<<<');
});

it('strips reasoning_content when excludeReasoningFromContext is true', () => {
const msg = buildApiMessage(storedContent, true);
expect(msg.content).toBe('The answer is 4.');
expect(msg.reasoning_content).toBeUndefined();
});

it('handles content with no reasoning in both modes', () => {
const plain = 'No reasoning here.';
const msgPreserve = buildApiMessage(plain, false);
const msgExclude = buildApiMessage(plain, true);
expect(msgPreserve.content).toBe(plain);
expect(msgPreserve.reasoning_content).toBeUndefined();
expect(msgExclude.content).toBe(plain);
expect(msgExclude.reasoning_content).toBeUndefined();
});

it('cleans agentic tool call blocks from content even when preserving reasoning', () => {
const input =
wrapReasoning('plan', 'text') +
'\n\n<<<AGENTIC_TOOL_CALL_START>>>\n' +
'<<<TOOL_NAME:bash>>>\n' +
'<<<TOOL_ARGS_START>>>\n{}\n<<<TOOL_ARGS_END>>>\nout\n' +
'<<<AGENTIC_TOOL_CALL_END>>>\n';
const msg = buildApiMessage(input, false);
expect(msg.content).not.toContain('<<<');
expect(msg.reasoning_content).toBe('plan');
});
});
Loading