From d119c342890a9746b02b31f7becf550c982240bd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 18:28:51 +0000 Subject: [PATCH 1/6] Initial plan From e006700285b47ff976a3dd512c15e72fbdfdf940 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 18:36:20 +0000 Subject: [PATCH 2/6] Initial exploration: understanding prompt text chunking requirements Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .github/workflows/mcp-inspector.lock.yml | 216 +++++++++++------------ 1 file changed, 108 insertions(+), 108 deletions(-) diff --git a/.github/workflows/mcp-inspector.lock.yml b/.github/workflows/mcp-inspector.lock.yml index 9d37e15f8f9..e9362919d9a 100644 --- a/.github/workflows/mcp-inspector.lock.yml +++ b/.github/workflows/mcp-inspector.lock.yml @@ -4777,11 +4777,11 @@ jobs: core.setFailed(`Error processing missing-tool reports: ${error}`); }); - post_to_slack_channel: + notion_add_comment: needs: - agent - detection - if: (always()) && (contains(needs.agent.outputs.output_types, 'post_to_slack_channel')) + if: (always()) && (contains(needs.agent.outputs.output_types, 'notion_add_comment')) runs-on: ubuntu-latest permissions: contents: read @@ -4796,27 +4796,25 @@ jobs: run: | find /tmp/gh-aw/safe-jobs/ -type f -print echo "GITHUB_AW_AGENT_OUTPUT=/tmp/gh-aw/safe-jobs/agent_output.json" >> $GITHUB_ENV - - name: Post message to Slack + - name: Add comment to Notion page uses: actions/github-script@v8 env: - SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - SLACK_CHANNEL_ID: ${{ env.GH_AW_SLACK_CHANNEL_ID }} + NOTION_API_TOKEN: ${{ secrets.NOTION_API_TOKEN }} + NOTION_PAGE_ID: ${{ vars.NOTION_PAGE_ID }} with: script: |- const fs = require('fs'); - const slackBotToken = process.env.SLACK_BOT_TOKEN; - const slackChannelId = process.env.SLACK_CHANNEL_ID; + const notionToken = process.env.NOTION_API_TOKEN; + const pageId = process.env.NOTION_PAGE_ID; const isStaged = process.env.GITHUB_AW_SAFE_OUTPUTS_STAGED === 'true'; const outputContent = process.env.GITHUB_AW_AGENT_OUTPUT; - // Validate required environment variables - if (!slackBotToken) { - core.setFailed('SLACK_BOT_TOKEN secret is not configured. Please add it to your repository secrets.'); + if (!notionToken) { + core.setFailed('NOTION_API_TOKEN secret is not configured'); return; } - - if (!slackChannelId) { - core.setFailed('GH_AW_SLACK_CHANNEL_ID environment variable is required'); + if (!pageId) { + core.setFailed('NOTION_PAGE_ID variable is not set'); return; } @@ -4840,91 +4838,79 @@ jobs: return; } - // Filter for post_to_slack_channel items - const slackMessageItems = agentOutputData.items.filter(item => item.type === 'post_to_slack_channel'); + // Filter for notion_add_comment items + const notionCommentItems = agentOutputData.items.filter(item => item.type === 'notion_add_comment'); - if (slackMessageItems.length === 0) { - core.info('No post_to_slack_channel items found in agent output'); + if (notionCommentItems.length === 0) { + core.info('No notion_add_comment items found in agent output'); return; } - core.info(`Found ${slackMessageItems.length} post_to_slack_channel item(s)`); - - // Process each message item - for (let i = 0; i < slackMessageItems.length; i++) { - const item = slackMessageItems[i]; - const message = item.message; + core.info(`Found ${notionCommentItems.length} notion_add_comment item(s)`); - if (!message) { - core.warning(`Item ${i + 1}: Missing message field, skipping`); - continue; - } + // Process each comment item + for (let i = 0; i < notionCommentItems.length; i++) { + const item = notionCommentItems[i]; + const comment = item.comment; - // Validate message length (max 200 characters) - const maxLength = 200; - if (message.length > maxLength) { - core.warning(`Item ${i + 1}: Message length (${message.length} characters) exceeds maximum allowed length of ${maxLength} characters, skipping`); + if (!comment) { + core.warning(`Item ${i + 1}: Missing comment field, skipping`); continue; } if (isStaged) { - let summaryContent = "## 🎭 Staged Mode: Slack Message Preview\n\n"; - summaryContent += "The following message would be posted to Slack if staged mode was disabled:\n\n"; - summaryContent += `**Channel ID:** ${slackChannelId}\n\n`; - summaryContent += `**Message:** ${message}\n\n`; - summaryContent += `**Message Length:** ${message.length} characters\n\n`; + let summaryContent = "## 🎭 Staged Mode: Notion Comment Preview\n\n"; + summaryContent += "The following comment would be added to Notion if staged mode was disabled:\n\n"; + summaryContent += `**Page ID:** ${pageId}\n\n`; + summaryContent += `**Comment:**\n${comment}\n\n`; await core.summary.addRaw(summaryContent).write(); - core.info("📝 Slack message preview written to step summary"); + core.info("📝 Notion comment preview written to step summary"); continue; } - core.info(`Posting message ${i + 1}/${slackMessageItems.length} to Slack channel: ${slackChannelId}`); - core.info(`Message length: ${message.length} characters`); + core.info(`Adding comment ${i + 1}/${notionCommentItems.length} to Notion page: ${pageId}`); try { - const response = await fetch('https://slack.com/api/chat.postMessage', { + const response = await fetch('https://api.notion.com/v1/comments', { method: 'POST', headers: { - 'Content-Type': 'application/json; charset=utf-8', - 'Authorization': `Bearer ${slackBotToken}` + 'Authorization': `Bearer ${notionToken}`, + 'Notion-Version': '2022-06-28', + 'Content-Type': 'application/json' }, body: JSON.stringify({ - channel: slackChannelId, - text: message + parent: { + page_id: pageId + }, + rich_text: [{ + type: 'text', + text: { + content: comment + } + }] }) }); - const data = await response.json(); - if (!response.ok) { - core.setFailed(`Slack API HTTP error (${response.status}): ${response.statusText}`); - return; - } - - if (!data.ok) { - core.setFailed(`Slack API error: ${data.error || 'Unknown error'}`); - if (data.error === 'invalid_auth') { - core.error('Authentication failed. Please verify your SLACK_BOT_TOKEN is correct.'); - } else if (data.error === 'channel_not_found') { - core.error('Channel not found. Please verify the GH_AW_SLACK_CHANNEL_ID environment variable is correct and the bot has access to it.'); - } + const errorData = await response.text(); + core.setFailed(`Notion API error (${response.status}): ${errorData}`); return; } - core.info(`✅ Message ${i + 1} posted successfully to Slack`); - core.info(`Message timestamp: ${data.ts}`); - core.info(`Channel: ${data.channel}`); + const data = await response.json(); + core.info(`✅ Comment ${i + 1} added successfully`); + core.info(`Comment ID: ${data.id}`); } catch (error) { - core.setFailed(`Failed to post message ${i + 1} to Slack: ${error instanceof Error ? error.message : String(error)}`); + core.setFailed(`Failed to add comment ${i + 1}: ${error instanceof Error ? error.message : String(error)}`); return; } } - notion_add_comment: + post_to_slack_channel: needs: - agent - detection - if: (always()) && (contains(needs.agent.outputs.output_types, 'notion_add_comment')) + if: (always()) && (contains(needs.agent.outputs.output_types, 'post_to_slack_channel')) runs-on: ubuntu-latest permissions: contents: read @@ -4939,25 +4925,27 @@ jobs: run: | find /tmp/gh-aw/safe-jobs/ -type f -print echo "GITHUB_AW_AGENT_OUTPUT=/tmp/gh-aw/safe-jobs/agent_output.json" >> $GITHUB_ENV - - name: Add comment to Notion page + - name: Post message to Slack uses: actions/github-script@v8 env: - NOTION_API_TOKEN: ${{ secrets.NOTION_API_TOKEN }} - NOTION_PAGE_ID: ${{ vars.NOTION_PAGE_ID }} + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + SLACK_CHANNEL_ID: ${{ env.GH_AW_SLACK_CHANNEL_ID }} with: script: |- const fs = require('fs'); - const notionToken = process.env.NOTION_API_TOKEN; - const pageId = process.env.NOTION_PAGE_ID; + const slackBotToken = process.env.SLACK_BOT_TOKEN; + const slackChannelId = process.env.SLACK_CHANNEL_ID; const isStaged = process.env.GITHUB_AW_SAFE_OUTPUTS_STAGED === 'true'; const outputContent = process.env.GITHUB_AW_AGENT_OUTPUT; - if (!notionToken) { - core.setFailed('NOTION_API_TOKEN secret is not configured'); + // Validate required environment variables + if (!slackBotToken) { + core.setFailed('SLACK_BOT_TOKEN secret is not configured. Please add it to your repository secrets.'); return; } - if (!pageId) { - core.setFailed('NOTION_PAGE_ID variable is not set'); + + if (!slackChannelId) { + core.setFailed('GH_AW_SLACK_CHANNEL_ID environment variable is required'); return; } @@ -4981,70 +4969,82 @@ jobs: return; } - // Filter for notion_add_comment items - const notionCommentItems = agentOutputData.items.filter(item => item.type === 'notion_add_comment'); + // Filter for post_to_slack_channel items + const slackMessageItems = agentOutputData.items.filter(item => item.type === 'post_to_slack_channel'); - if (notionCommentItems.length === 0) { - core.info('No notion_add_comment items found in agent output'); + if (slackMessageItems.length === 0) { + core.info('No post_to_slack_channel items found in agent output'); return; } - core.info(`Found ${notionCommentItems.length} notion_add_comment item(s)`); + core.info(`Found ${slackMessageItems.length} post_to_slack_channel item(s)`); - // Process each comment item - for (let i = 0; i < notionCommentItems.length; i++) { - const item = notionCommentItems[i]; - const comment = item.comment; + // Process each message item + for (let i = 0; i < slackMessageItems.length; i++) { + const item = slackMessageItems[i]; + const message = item.message; - if (!comment) { - core.warning(`Item ${i + 1}: Missing comment field, skipping`); + if (!message) { + core.warning(`Item ${i + 1}: Missing message field, skipping`); + continue; + } + + // Validate message length (max 200 characters) + const maxLength = 200; + if (message.length > maxLength) { + core.warning(`Item ${i + 1}: Message length (${message.length} characters) exceeds maximum allowed length of ${maxLength} characters, skipping`); continue; } if (isStaged) { - let summaryContent = "## 🎭 Staged Mode: Notion Comment Preview\n\n"; - summaryContent += "The following comment would be added to Notion if staged mode was disabled:\n\n"; - summaryContent += `**Page ID:** ${pageId}\n\n`; - summaryContent += `**Comment:**\n${comment}\n\n`; + let summaryContent = "## 🎭 Staged Mode: Slack Message Preview\n\n"; + summaryContent += "The following message would be posted to Slack if staged mode was disabled:\n\n"; + summaryContent += `**Channel ID:** ${slackChannelId}\n\n`; + summaryContent += `**Message:** ${message}\n\n`; + summaryContent += `**Message Length:** ${message.length} characters\n\n`; await core.summary.addRaw(summaryContent).write(); - core.info("📝 Notion comment preview written to step summary"); + core.info("📝 Slack message preview written to step summary"); continue; } - core.info(`Adding comment ${i + 1}/${notionCommentItems.length} to Notion page: ${pageId}`); + core.info(`Posting message ${i + 1}/${slackMessageItems.length} to Slack channel: ${slackChannelId}`); + core.info(`Message length: ${message.length} characters`); try { - const response = await fetch('https://api.notion.com/v1/comments', { + const response = await fetch('https://slack.com/api/chat.postMessage', { method: 'POST', headers: { - 'Authorization': `Bearer ${notionToken}`, - 'Notion-Version': '2022-06-28', - 'Content-Type': 'application/json' + 'Content-Type': 'application/json; charset=utf-8', + 'Authorization': `Bearer ${slackBotToken}` }, body: JSON.stringify({ - parent: { - page_id: pageId - }, - rich_text: [{ - type: 'text', - text: { - content: comment - } - }] + channel: slackChannelId, + text: message }) }); + const data = await response.json(); + if (!response.ok) { - const errorData = await response.text(); - core.setFailed(`Notion API error (${response.status}): ${errorData}`); + core.setFailed(`Slack API HTTP error (${response.status}): ${response.statusText}`); return; } - const data = await response.json(); - core.info(`✅ Comment ${i + 1} added successfully`); - core.info(`Comment ID: ${data.id}`); + if (!data.ok) { + core.setFailed(`Slack API error: ${data.error || 'Unknown error'}`); + if (data.error === 'invalid_auth') { + core.error('Authentication failed. Please verify your SLACK_BOT_TOKEN is correct.'); + } else if (data.error === 'channel_not_found') { + core.error('Channel not found. Please verify the GH_AW_SLACK_CHANNEL_ID environment variable is correct and the bot has access to it.'); + } + return; + } + + core.info(`✅ Message ${i + 1} posted successfully to Slack`); + core.info(`Message timestamp: ${data.ts}`); + core.info(`Channel: ${data.channel}`); } catch (error) { - core.setFailed(`Failed to add comment ${i + 1}: ${error instanceof Error ? error.message : String(error)}`); + core.setFailed(`Failed to post message ${i + 1} to Slack: ${error instanceof Error ? error.message : String(error)}`); return; } } From 3c2583a7d359034c186aaec0047d662e2abed7ec Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 18:41:50 +0000 Subject: [PATCH 3/6] Implement chunking in WritePromptTextToYAML function Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/sh.go | 68 +++++++++- pkg/workflow/sh_test.go | 287 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 350 insertions(+), 5 deletions(-) create mode 100644 pkg/workflow/sh_test.go diff --git a/pkg/workflow/sh.go b/pkg/workflow/sh.go index bd50f5cac63..14de69c13c9 100644 --- a/pkg/workflow/sh.go +++ b/pkg/workflow/sh.go @@ -59,12 +59,70 @@ func WriteShellScriptToYAML(yaml *strings.Builder, script string, indent string) } } -// WritePromptTextToYAML writes prompt text to a YAML heredoc with proper indentation +// WritePromptTextToYAML writes prompt text to a YAML heredoc with proper indentation. +// It chunks the text into groups of lines of less than 20000 characters, with a maximum of 5 chunks. +// Each chunk is written as a separate heredoc to avoid GitHub Actions step size limits (21KB). func WritePromptTextToYAML(yaml *strings.Builder, text string, indent string) { - yaml.WriteString(indent + "cat >> $GITHUB_AW_PROMPT << 'EOF'\n") + const maxChunkSize = 20000 // 20KB limit for each chunk + const maxChunks = 5 // Maximum number of chunks + textLines := strings.Split(text, "\n") - for _, line := range textLines { - fmt.Fprintf(yaml, "%s%s\n", indent, line) + chunks := chunkLines(textLines, indent, maxChunkSize, maxChunks) + + // Write each chunk as a separate heredoc + for _, chunk := range chunks { + yaml.WriteString(indent + "cat >> $GITHUB_AW_PROMPT << 'EOF'\n") + for _, line := range chunk { + fmt.Fprintf(yaml, "%s%s\n", indent, line) + } + yaml.WriteString(indent + "EOF\n") } - yaml.WriteString(indent + "EOF\n") +} + +// chunkLines splits lines into chunks where each chunk's total size (including indent) is less than maxSize. +// Returns at most maxChunks chunks. If content exceeds the limit, it truncates at the last chunk. +func chunkLines(lines []string, indent string, maxSize int, maxChunks int) [][]string { + if len(lines) == 0 { + return [][]string{{}} + } + + var chunks [][]string + var currentChunk []string + currentSize := 0 + + for _, line := range lines { + // Calculate size including indent and newline + lineSize := len(indent) + len(line) + 1 + + // If adding this line would exceed the limit, start a new chunk + if currentSize+lineSize > maxSize && len(currentChunk) > 0 { + // Check if we've reached the maximum number of chunks + if len(chunks) >= maxChunks-1 { + // We're at the last allowed chunk, so add remaining lines to current chunk + currentChunk = append(currentChunk, line) + currentSize += lineSize + continue + } + + // Start a new chunk + chunks = append(chunks, currentChunk) + currentChunk = []string{line} + currentSize = lineSize + } else { + currentChunk = append(currentChunk, line) + currentSize += lineSize + } + } + + // Add the last chunk if there's content + if len(currentChunk) > 0 { + chunks = append(chunks, currentChunk) + } + + // If we still have no chunks, return an empty chunk + if len(chunks) == 0 { + return [][]string{{}} + } + + return chunks } diff --git a/pkg/workflow/sh_test.go b/pkg/workflow/sh_test.go new file mode 100644 index 00000000000..73bec34a15f --- /dev/null +++ b/pkg/workflow/sh_test.go @@ -0,0 +1,287 @@ +package workflow + +import ( + "strings" + "testing" +) + +func TestWritePromptTextToYAML_SmallText(t *testing.T) { + var yaml strings.Builder + text := "This is a small text\nWith a few lines\nThat doesn't need chunking" + indent := " " + + WritePromptTextToYAML(&yaml, text, indent) + + result := yaml.String() + + // Should have exactly one heredoc block + if strings.Count(result, "cat >> $GITHUB_AW_PROMPT << 'EOF'") != 1 { + t.Errorf("Expected 1 heredoc block for small text, got %d", strings.Count(result, "cat >> $GITHUB_AW_PROMPT << 'EOF'")) + } + + // Should contain all original lines + if !strings.Contains(result, "This is a small text") { + t.Error("Expected to find original text in output") + } + if !strings.Contains(result, "With a few lines") { + t.Error("Expected to find original text in output") + } + if !strings.Contains(result, "That doesn't need chunking") { + t.Error("Expected to find original text in output") + } + + // Should have proper EOF markers + if strings.Count(result, indent+"EOF") != 1 { + t.Errorf("Expected 1 EOF marker, got %d", strings.Count(result, indent+"EOF")) + } +} + +func TestWritePromptTextToYAML_LargeText(t *testing.T) { + var yaml strings.Builder + // Create text that exceeds 20000 characters + longLine := strings.Repeat("This is a very long line of content that will be repeated many times to exceed the character limit. ", 10) + lines := make([]string, 50) + for i := range lines { + lines[i] = longLine + } + text := strings.Join(lines, "\n") + indent := " " + + // Calculate expected size + totalSize := 0 + for _, line := range lines { + totalSize += len(indent) + len(line) + 1 + } + + // This should create multiple chunks since each line is ~1000 chars and we have 50 lines + WritePromptTextToYAML(&yaml, text, indent) + + result := yaml.String() + + // Should have multiple heredoc blocks + heredocCount := strings.Count(result, "cat >> $GITHUB_AW_PROMPT << 'EOF'") + if heredocCount < 2 { + t.Errorf("Expected at least 2 heredoc blocks for large text (total size ~%d bytes), got %d", totalSize, heredocCount) + } + + // Should not exceed 5 chunks (max limit) + if heredocCount > 5 { + t.Errorf("Expected at most 5 heredoc blocks, got %d", heredocCount) + } + + // Should have matching EOF markers + eofCount := strings.Count(result, indent+"EOF") + if eofCount != heredocCount { + t.Errorf("Expected %d EOF markers to match %d heredoc blocks, got %d", heredocCount, heredocCount, eofCount) + } + + // Should contain original content (or at least the beginning if truncated) + firstLine := strings.Split(text, "\n")[0] + if !strings.Contains(result, firstLine[:50]) { + t.Error("Expected to find beginning of original text in output") + } +} + +func TestWritePromptTextToYAML_ExactChunkBoundary(t *testing.T) { + var yaml strings.Builder + indent := " " + + // Create text that's exactly at the 20000 character boundary + // Each line: indent (10) + line (100) + newline (1) = 111 bytes + // 180 lines = 19,980 bytes (just under 20000) + line := strings.Repeat("x", 100) + lines := make([]string, 180) + for i := range lines { + lines[i] = line + } + text := strings.Join(lines, "\n") + + WritePromptTextToYAML(&yaml, text, indent) + + result := yaml.String() + + // Should have exactly 1 heredoc block since we're just under the limit + heredocCount := strings.Count(result, "cat >> $GITHUB_AW_PROMPT << 'EOF'") + if heredocCount != 1 { + t.Errorf("Expected 1 heredoc block for text just under limit, got %d", heredocCount) + } +} + +func TestWritePromptTextToYAML_MaxChunksLimit(t *testing.T) { + var yaml strings.Builder + indent := " " + + // Create text that would need more than 5 chunks (if we allowed it) + // Each line: indent (10) + line (1000) + newline (1) = 1011 bytes + // 600 lines = ~606,600 bytes + // At 20000 bytes per chunk, this would need ~31 chunks, but we limit to 5 + line := strings.Repeat("y", 1000) + lines := make([]string, 600) + for i := range lines { + lines[i] = line + } + text := strings.Join(lines, "\n") + + WritePromptTextToYAML(&yaml, text, indent) + + result := yaml.String() + + // Should have exactly 5 heredoc blocks (the maximum) + heredocCount := strings.Count(result, "cat >> $GITHUB_AW_PROMPT << 'EOF'") + if heredocCount != 5 { + t.Errorf("Expected exactly 5 heredoc blocks (max limit), got %d", heredocCount) + } + + // Should have matching EOF markers + eofCount := strings.Count(result, indent+"EOF") + if eofCount != 5 { + t.Errorf("Expected 5 EOF markers, got %d", eofCount) + } +} + +func TestWritePromptTextToYAML_EmptyText(t *testing.T) { + var yaml strings.Builder + text := "" + indent := " " + + WritePromptTextToYAML(&yaml, text, indent) + + result := yaml.String() + + // Should have at least one heredoc block (even for empty text) + if strings.Count(result, "cat >> $GITHUB_AW_PROMPT << 'EOF'") < 1 { + t.Error("Expected at least 1 heredoc block even for empty text") + } + + // Should have matching EOF markers + if strings.Count(result, indent+"EOF") < 1 { + t.Error("Expected at least 1 EOF marker") + } +} + +func TestChunkLines_SmallInput(t *testing.T) { + lines := []string{"line1", "line2", "line3"} + indent := " " + maxSize := 20000 + maxChunks := 5 + + chunks := chunkLines(lines, indent, maxSize, maxChunks) + + if len(chunks) != 1 { + t.Errorf("Expected 1 chunk for small input, got %d", len(chunks)) + } + + if len(chunks[0]) != 3 { + t.Errorf("Expected chunk to contain 3 lines, got %d", len(chunks[0])) + } +} + +func TestChunkLines_ExceedsSize(t *testing.T) { + // Create lines that will exceed maxSize + line := strings.Repeat("x", 1000) + lines := make([]string, 50) + for i := range lines { + lines[i] = line + } + + indent := " " + maxSize := 20000 + maxChunks := 5 + + chunks := chunkLines(lines, indent, maxSize, maxChunks) + + // Should have multiple chunks + if len(chunks) < 2 { + t.Errorf("Expected at least 2 chunks, got %d", len(chunks)) + } + + // Verify each chunk (except possibly the last) stays within size limit + for i, chunk := range chunks { + size := 0 + for _, line := range chunk { + size += len(indent) + len(line) + 1 + } + + // Last chunk might exceed if we hit maxChunks limit + if i < len(chunks)-1 && size > maxSize { + t.Errorf("Chunk %d exceeds size limit: %d > %d", i, size, maxSize) + } + } + + // Verify total lines are preserved + totalLines := 0 + for _, chunk := range chunks { + totalLines += len(chunk) + } + if totalLines != len(lines) { + t.Errorf("Expected %d total lines, got %d", len(lines), totalLines) + } +} + +func TestChunkLines_MaxChunksEnforced(t *testing.T) { + // Create many lines that would need more than maxChunks + line := strings.Repeat("x", 1000) + lines := make([]string, 600) + for i := range lines { + lines[i] = line + } + + indent := " " + maxSize := 20000 + maxChunks := 5 + + chunks := chunkLines(lines, indent, maxSize, maxChunks) + + // Should have exactly maxChunks + if len(chunks) != maxChunks { + t.Errorf("Expected exactly %d chunks (max limit), got %d", maxChunks, len(chunks)) + } + + // Verify all lines are included (even if last chunk is large) + totalLines := 0 + for _, chunk := range chunks { + totalLines += len(chunk) + } + if totalLines != len(lines) { + t.Errorf("Expected %d total lines, got %d", len(lines), totalLines) + } +} + +func TestChunkLines_EmptyInput(t *testing.T) { + lines := []string{} + indent := " " + maxSize := 20000 + maxChunks := 5 + + chunks := chunkLines(lines, indent, maxSize, maxChunks) + + // Should return at least one empty chunk + if len(chunks) != 1 { + t.Errorf("Expected 1 chunk for empty input, got %d", len(chunks)) + } + + if len(chunks[0]) != 0 { + t.Errorf("Expected empty chunk, got %d lines", len(chunks[0])) + } +} + +func TestChunkLines_SingleLineExceedsLimit(t *testing.T) { + // Single line that exceeds maxSize + line := strings.Repeat("x", 25000) + lines := []string{line} + + indent := " " + maxSize := 20000 + maxChunks := 5 + + chunks := chunkLines(lines, indent, maxSize, maxChunks) + + // Should still have one chunk with that single line + if len(chunks) != 1 { + t.Errorf("Expected 1 chunk, got %d", len(chunks)) + } + + if len(chunks[0]) != 1 { + t.Errorf("Expected 1 line in chunk, got %d", len(chunks[0])) + } +} From 23667bbb135d68c5c396fecf6705761c15252e71 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 18:47:38 +0000 Subject: [PATCH 4/6] Add integration tests for prompt text chunking Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/sh_integration_test.go | 154 ++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 pkg/workflow/sh_integration_test.go diff --git a/pkg/workflow/sh_integration_test.go b/pkg/workflow/sh_integration_test.go new file mode 100644 index 00000000000..aa62bf3dc72 --- /dev/null +++ b/pkg/workflow/sh_integration_test.go @@ -0,0 +1,154 @@ +package workflow + +import ( + "strings" + "testing" +) + +// TestWritePromptTextToYAML_IntegrationWithCompiler verifies that WritePromptTextToYAML +// correctly handles large prompt text that would be used in actual workflow compilation. +// This test simulates what would happen if an embedded prompt file was very large. +func TestWritePromptTextToYAML_IntegrationWithCompiler(t *testing.T) { + // Create a realistic scenario: a very long help text or documentation + // that might be included as prompt instructions + section := strings.Repeat("This is an important instruction line that provides guidance to the AI agent on how to perform its task correctly. ", 10) + + // Create 200 lines to ensure we exceed 20KB + lines := make([]string, 200) + for i := range lines { + lines[i] = section + } + largePromptText := strings.Join(lines, "\n") + + // Calculate total size + totalSize := len(largePromptText) + if totalSize < 20000 { + t.Fatalf("Test setup error: prompt text should be at least 20000 bytes, got %d", totalSize) + } + + var yaml strings.Builder + indent := " " // Standard indent used in workflow generation + + // Call the function as it would be called in real compilation + WritePromptTextToYAML(&yaml, largePromptText, indent) + + result := yaml.String() + + // Verify multiple heredoc blocks were created + heredocCount := strings.Count(result, "cat >> $GITHUB_AW_PROMPT << 'EOF'") + if heredocCount < 2 { + t.Errorf("Expected multiple heredoc blocks for large text (%d bytes), got %d", totalSize, heredocCount) + } + + // Verify we didn't exceed 5 chunks + if heredocCount > 5 { + t.Errorf("Expected at most 5 heredoc blocks (max limit), got %d", heredocCount) + } + + // Verify each heredoc is closed + eofCount := strings.Count(result, indent+"EOF") + if eofCount != heredocCount { + t.Errorf("Expected %d EOF markers to match %d heredoc blocks, got %d", heredocCount, heredocCount, eofCount) + } + + // Verify the content is preserved (check first and last sections) + firstSection := section[:100] + lastSection := section[len(section)-100:] + if !strings.Contains(result, firstSection) { + t.Error("Expected to find beginning of original text in output") + } + if !strings.Contains(result, lastSection) { + t.Error("Expected to find end of original text in output") + } + + // Verify the YAML structure is valid (basic check) + if !strings.Contains(result, "cat >> $GITHUB_AW_PROMPT << 'EOF'") { + t.Error("Expected proper heredoc syntax in output") + } + + t.Logf("Successfully chunked %d bytes into %d heredoc blocks", totalSize, heredocCount) +} + +// TestWritePromptTextToYAML_RealWorldSizeSimulation simulates various real-world scenarios +// to ensure chunking works correctly across different text sizes. +func TestWritePromptTextToYAML_RealWorldSizeSimulation(t *testing.T) { + tests := []struct { + name string + textSize int // approximate size in bytes + linesCount int // number of lines + expectedChunks int // expected number of chunks + maxChunks int // should not exceed this + }{ + { + name: "small prompt (< 1KB)", + textSize: 500, + linesCount: 10, + expectedChunks: 1, + maxChunks: 1, + }, + { + name: "medium prompt (~10KB)", + textSize: 10000, + linesCount: 100, + expectedChunks: 1, + maxChunks: 1, + }, + { + name: "large prompt (~25KB)", + textSize: 25000, + linesCount: 250, + expectedChunks: 2, + maxChunks: 2, + }, + { + name: "very large prompt (~50KB)", + textSize: 50000, + linesCount: 500, + expectedChunks: 3, + maxChunks: 3, + }, + { + name: "extremely large prompt (~120KB)", + textSize: 120000, + linesCount: 1200, + expectedChunks: 5, + maxChunks: 5, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create text of approximately the desired size + lineSize := tt.textSize / tt.linesCount + line := strings.Repeat("x", lineSize) + lines := make([]string, tt.linesCount) + for i := range lines { + lines[i] = line + } + text := strings.Join(lines, "\n") + + var yaml strings.Builder + indent := " " + + WritePromptTextToYAML(&yaml, text, indent) + + result := yaml.String() + heredocCount := strings.Count(result, "cat >> $GITHUB_AW_PROMPT << 'EOF'") + + if heredocCount < tt.expectedChunks { + t.Errorf("Expected at least %d chunks for %s, got %d", tt.expectedChunks, tt.name, heredocCount) + } + + if heredocCount > tt.maxChunks { + t.Errorf("Expected at most %d chunks for %s, got %d", tt.maxChunks, tt.name, heredocCount) + } + + eofCount := strings.Count(result, indent+"EOF") + if eofCount != heredocCount { + t.Errorf("EOF count (%d) doesn't match heredoc count (%d) for %s", eofCount, heredocCount, tt.name) + } + + t.Logf("%s: %d bytes chunked into %d blocks", tt.name, len(text), heredocCount) + }) + } +} From cde384efe6a9379c31115d5a57097d532118c9e4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 18:53:44 +0000 Subject: [PATCH 5/6] Improve test accuracy for size calculation Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/sh_integration_test.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/workflow/sh_integration_test.go b/pkg/workflow/sh_integration_test.go index aa62bf3dc72..5f5a708fa20 100644 --- a/pkg/workflow/sh_integration_test.go +++ b/pkg/workflow/sh_integration_test.go @@ -119,7 +119,11 @@ func TestWritePromptTextToYAML_RealWorldSizeSimulation(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Create text of approximately the desired size - lineSize := tt.textSize / tt.linesCount + // Account for newlines: total size = linesCount * (lineSize + 1) - 1 (no trailing newline) + lineSize := (tt.textSize + 1) / tt.linesCount // Adjust for newlines + if lineSize < 1 { + lineSize = 1 + } line := strings.Repeat("x", lineSize) lines := make([]string, tt.linesCount) for i := range lines { From d1dacde30519a4c4f950a456a10cba549ae89037 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 19:11:05 +0000 Subject: [PATCH 6/6] Move constants to compiler.go and add data integrity tests Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/compiler.go | 8 ++ pkg/workflow/sh.go | 7 +- pkg/workflow/sh_integration_test.go | 211 ++++++++++++++++++++++++++++ 3 files changed, 221 insertions(+), 5 deletions(-) diff --git a/pkg/workflow/compiler.go b/pkg/workflow/compiler.go index fe8584c31e3..7ef27ec662c 100644 --- a/pkg/workflow/compiler.go +++ b/pkg/workflow/compiler.go @@ -26,6 +26,14 @@ const ( // This includes environment variable values, if conditions, and other expression contexts // See: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration MaxExpressionSize = 21000 // 21KB in bytes + + // MaxPromptChunkSize is the maximum size for each chunk when splitting prompt text (20KB) + // This limit ensures each heredoc block stays under GitHub Actions step size limits (21KB) + MaxPromptChunkSize = 20000 // 20KB limit for each chunk + + // MaxPromptChunks is the maximum number of chunks allowed when splitting prompt text + // This prevents excessive step generation for extremely large prompt texts + MaxPromptChunks = 5 // Maximum number of chunks ) //go:embed schemas/github-workflow.json diff --git a/pkg/workflow/sh.go b/pkg/workflow/sh.go index 14de69c13c9..9f93dc732cd 100644 --- a/pkg/workflow/sh.go +++ b/pkg/workflow/sh.go @@ -60,14 +60,11 @@ func WriteShellScriptToYAML(yaml *strings.Builder, script string, indent string) } // WritePromptTextToYAML writes prompt text to a YAML heredoc with proper indentation. -// It chunks the text into groups of lines of less than 20000 characters, with a maximum of 5 chunks. +// It chunks the text into groups of lines of less than MaxPromptChunkSize characters, with a maximum of MaxPromptChunks chunks. // Each chunk is written as a separate heredoc to avoid GitHub Actions step size limits (21KB). func WritePromptTextToYAML(yaml *strings.Builder, text string, indent string) { - const maxChunkSize = 20000 // 20KB limit for each chunk - const maxChunks = 5 // Maximum number of chunks - textLines := strings.Split(text, "\n") - chunks := chunkLines(textLines, indent, maxChunkSize, maxChunks) + chunks := chunkLines(textLines, indent, MaxPromptChunkSize, MaxPromptChunks) // Write each chunk as a separate heredoc for _, chunk := range chunks { diff --git a/pkg/workflow/sh_integration_test.go b/pkg/workflow/sh_integration_test.go index 5f5a708fa20..14b81fe2171 100644 --- a/pkg/workflow/sh_integration_test.go +++ b/pkg/workflow/sh_integration_test.go @@ -67,6 +67,29 @@ func TestWritePromptTextToYAML_IntegrationWithCompiler(t *testing.T) { } t.Logf("Successfully chunked %d bytes into %d heredoc blocks", totalSize, heredocCount) + + // Verify no lines are lost - extract content from heredoc blocks and compare + extractedLines := extractLinesFromYAML(result, indent) + originalLines := strings.Split(largePromptText, "\n") + + if len(extractedLines) != len(originalLines) { + t.Errorf("Line count mismatch: expected %d lines, got %d lines", len(originalLines), len(extractedLines)) + } + + // Verify content integrity by checking line-by-line + mismatchCount := 0 + for i := 0; i < len(originalLines) && i < len(extractedLines); i++ { + if originalLines[i] != extractedLines[i] { + mismatchCount++ + if mismatchCount <= 3 { // Only report first 3 mismatches + t.Errorf("Line %d mismatch:\nExpected: %q\nGot: %q", i+1, originalLines[i], extractedLines[i]) + } + } + } + + if mismatchCount > 0 { + t.Errorf("Total line mismatches: %d", mismatchCount) + } } // TestWritePromptTextToYAML_RealWorldSizeSimulation simulates various real-world scenarios @@ -153,6 +176,194 @@ func TestWritePromptTextToYAML_RealWorldSizeSimulation(t *testing.T) { } t.Logf("%s: %d bytes chunked into %d blocks", tt.name, len(text), heredocCount) + + // Verify no lines are lost + extractedLines := extractLinesFromYAML(result, indent) + originalLines := strings.Split(text, "\n") + + if len(extractedLines) != len(originalLines) { + t.Errorf("%s: Line count mismatch - expected %d lines, got %d lines", tt.name, len(originalLines), len(extractedLines)) + } + }) + } +} + +// extractLinesFromYAML extracts the actual content lines from a YAML heredoc output +// by parsing the heredoc blocks and removing the indent +func extractLinesFromYAML(yamlOutput string, indent string) []string { + var lines []string + inHeredoc := false + + for _, line := range strings.Split(yamlOutput, "\n") { + // Check if we're starting a heredoc block + if strings.Contains(line, "cat >> $GITHUB_AW_PROMPT << 'EOF'") { + inHeredoc = true + continue + } + + // Check if we're ending a heredoc block + if strings.TrimSpace(line) == "EOF" { + inHeredoc = false + continue + } + + // If we're in a heredoc block, extract the content line + if inHeredoc { + // Remove the indent from the line + if strings.HasPrefix(line, indent) { + contentLine := strings.TrimPrefix(line, indent) + lines = append(lines, contentLine) + } + } + } + + return lines +} + +// TestWritePromptTextToYAML_NoDataLoss verifies that no lines or chunks are lost +// during the chunking process, even with edge cases. +func TestWritePromptTextToYAML_NoDataLoss(t *testing.T) { + tests := []struct { + name string + lines []string + expectLoss bool + }{ + { + name: "single line", + lines: []string{"Single line of text"}, + expectLoss: false, + }, + { + name: "multiple short lines", + lines: []string{"Line 1", "Line 2", "Line 3", "Line 4", "Line 5"}, + expectLoss: false, + }, + { + name: "empty lines", + lines: []string{"Line 1", "", "Line 3", "", "Line 5"}, + expectLoss: false, + }, + { + name: "very long single line", + lines: []string{strings.Repeat("x", 25000)}, + expectLoss: false, + }, + { + name: "exactly at chunk boundary", + lines: func() []string { + // Create lines that total exactly 20000 bytes with indent + line := strings.Repeat("x", 100) + lines := make([]string, 180) + for i := range lines { + lines[i] = line + } + return lines + }(), + expectLoss: false, + }, + { + name: "large number of lines requiring max chunks", + lines: func() []string { + line := strings.Repeat("y", 1000) + lines := make([]string, 600) + for i := range lines { + lines[i] = line + } + return lines + }(), + expectLoss: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + text := strings.Join(tt.lines, "\n") + var yaml strings.Builder + indent := " " + + WritePromptTextToYAML(&yaml, text, indent) + + result := yaml.String() + + // Extract lines from the YAML output + extractedLines := extractLinesFromYAML(result, indent) + + // Verify line count + if len(extractedLines) != len(tt.lines) { + t.Errorf("Line count mismatch: expected %d lines, got %d lines", len(tt.lines), len(extractedLines)) + t.Logf("Original lines: %d", len(tt.lines)) + t.Logf("Extracted lines: %d", len(extractedLines)) + } + + // Verify content integrity + mismatchCount := 0 + for i := 0; i < len(tt.lines) && i < len(extractedLines); i++ { + if tt.lines[i] != extractedLines[i] { + mismatchCount++ + if mismatchCount <= 3 { + t.Errorf("Line %d mismatch:\nExpected: %q\nGot: %q", i+1, tt.lines[i], extractedLines[i]) + } + } + } + + if mismatchCount > 0 { + t.Errorf("Total line mismatches: %d", mismatchCount) + } }) } } + +// TestWritePromptTextToYAML_ChunkIntegrity verifies that chunks are properly formed +// and that the chunking process maintains data integrity. +func TestWritePromptTextToYAML_ChunkIntegrity(t *testing.T) { + // Create a large text that will require multiple chunks + line := strings.Repeat("Test line with some content. ", 50) + lines := make([]string, 300) + for i := range lines { + lines[i] = line + } + text := strings.Join(lines, "\n") + + var yaml strings.Builder + indent := " " + + WritePromptTextToYAML(&yaml, text, indent) + + result := yaml.String() + + // Count heredoc blocks + heredocCount := strings.Count(result, "cat >> $GITHUB_AW_PROMPT << 'EOF'") + + t.Logf("Created %d heredoc blocks for %d lines (%d bytes)", heredocCount, len(lines), len(text)) + + // Verify we have multiple chunks but not exceeding max + if heredocCount < 2 { + t.Errorf("Expected multiple chunks for large text, got %d", heredocCount) + } + + if heredocCount > MaxPromptChunks { + t.Errorf("Expected at most %d chunks, got %d", MaxPromptChunks, heredocCount) + } + + // Verify all heredocs are properly closed + eofCount := strings.Count(result, indent+"EOF") + if eofCount != heredocCount { + t.Errorf("Heredoc closure mismatch: %d opens, %d closes", heredocCount, eofCount) + } + + // Verify no data loss + extractedLines := extractLinesFromYAML(result, indent) + if len(extractedLines) != len(lines) { + t.Errorf("Line count mismatch: expected %d, got %d", len(lines), len(extractedLines)) + } + + // Verify content integrity by checking a few random samples + sampleIndices := []int{0, len(lines) / 4, len(lines) / 2, len(lines) * 3 / 4, len(lines) - 1} + for _, idx := range sampleIndices { + if idx < len(lines) && idx < len(extractedLines) { + if lines[idx] != extractedLines[idx] { + t.Errorf("Content mismatch at line %d:\nExpected: %q\nGot: %q", idx+1, lines[idx], extractedLines[idx]) + } + } + } +}