From c46687fc44d29b2b201ff05deb98c51ff9a060a9 Mon Sep 17 00:00:00 2001 From: Ed Zynda Date: Mon, 9 Mar 2026 10:26:31 +0300 Subject: [PATCH] fix: pass image file parts through Fantasy agent's Files field splitPromptAndHistory was extracting only text from the last user message, discarding FilePart data (clipboard images). The fix extracts both text and file parts, passing files via AgentStreamCall.Files and AgentCall.Files so Fantasy includes them in the API request. Also preserves file parts when BeforeTurn hooks or skill expansion replace the user message text in runTurn. --- internal/agent/agent.go | 31 ++++++++++++++++++++----------- pkg/kit/kit.go | 24 ++++++++++++++++++++---- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/internal/agent/agent.go b/internal/agent/agent.go index 82c744a3..edb804b8 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -228,8 +228,10 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan ) (*GenerateWithLoopResult, error) { // Fantasy requires the current user input as Prompt, with prior messages as history. - // Extract the last user message text as the prompt, and pass everything before it as Messages. - prompt, history := splitPromptAndHistory(messages) + // Extract the last user message text and files as the prompt, and pass everything + // before it as Messages. Files (e.g. clipboard images) are passed via the Files + // field so Fantasy includes them in the API request. + prompt, files, history := splitPromptAndHistory(messages) // Track current tool call info for callbacks var currentToolName string @@ -246,6 +248,7 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan // Use fantasy's streaming agent result, err := a.fantasyAgent.Stream(ctx, fantasy.AgentStreamCall{ Prompt: prompt, + Files: files, Messages: history, // Reasoning/thinking streaming callback @@ -340,6 +343,7 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan // Non-streaming path with no callbacks — use the simpler Generate call. result, err := a.fantasyAgent.Generate(ctx, fantasy.AgentCall{ Prompt: prompt, + Files: files, Messages: history, }) if err != nil { @@ -360,27 +364,32 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan // and returns everything before it as conversation history. Fantasy's agent // requires the current turn's input as Prompt (string), with prior messages // passed separately as Messages (history). -func splitPromptAndHistory(messages []fantasy.Message) (string, []fantasy.Message) { +func splitPromptAndHistory(messages []fantasy.Message) (string, []fantasy.FilePart, []fantasy.Message) { if len(messages) == 0 { - return "", nil + return "", nil, nil } // Walk backwards to find the last user message for i := len(messages) - 1; i >= 0; i-- { if messages[i].Role == fantasy.MessageRoleUser { - // Extract text from the user message parts + // Extract text and file parts from the user message var prompt string + var files []fantasy.FilePart for _, part := range messages[i].Content { - if tp, ok := part.(fantasy.TextPart); ok { - prompt = tp.Text - break + switch p := part.(type) { + case fantasy.TextPart: + if prompt == "" { + prompt = p.Text + } + case fantasy.FilePart: + files = append(files, p) } } // History is everything except this last user message history := make([]fantasy.Message, 0, len(messages)-1) history = append(history, messages[:i]...) history = append(history, messages[i+1:]...) - return prompt, history + return prompt, files, history } } @@ -388,11 +397,11 @@ func splitPromptAndHistory(messages []fantasy.Message) (string, []fantasy.Messag last := messages[len(messages)-1] for _, part := range last.Content { if tp, ok := part.(fantasy.TextPart); ok { - return tp.Text, messages[:len(messages)-1] + return tp.Text, nil, messages[:len(messages)-1] } } - return "", messages + return "", nil, messages } // convertAgentResult converts a fantasy AgentResult to our GenerateWithLoopResult. diff --git a/pkg/kit/kit.go b/pkg/kit/kit.go index b07dd4ed..19a7c961 100644 --- a/pkg/kit/kit.go +++ b/pkg/kit/kit.go @@ -1222,10 +1222,12 @@ func (m *Kit) runTurn(ctx context.Context, promptLabel string, prompt string, pr // block, and appends any trailing user args. if expanded := m.expandSkillCommand(prompt); expanded != prompt { prompt = expanded - // Replace the last user message in preMessages with the expanded text. + // Replace the last user message in preMessages with the expanded text, + // preserving any file parts (e.g. clipboard images). for i := len(preMessages) - 1; i >= 0; i-- { if preMessages[i].Role == fantasy.MessageRoleUser { - preMessages[i] = fantasy.NewUserMessage(expanded) + files := extractFileParts(preMessages[i]) + preMessages[i] = fantasy.NewUserMessage(expanded, files...) break } } @@ -1234,11 +1236,13 @@ func (m *Kit) runTurn(ctx context.Context, promptLabel string, prompt string, pr // Run BeforeTurn hooks — can modify the prompt, inject system/context messages. if m.beforeTurn.hasHooks() { if hookResult := m.beforeTurn.run(BeforeTurnHook{Prompt: prompt}); hookResult != nil { - // Override prompt text in the last user message. + // Override prompt text in the last user message, preserving + // any file parts (e.g. clipboard images). if hookResult.Prompt != nil { for i := len(preMessages) - 1; i >= 0; i-- { if preMessages[i].Role == fantasy.MessageRoleUser { - preMessages[i] = fantasy.NewUserMessage(*hookResult.Prompt) + files := extractFileParts(preMessages[i]) + preMessages[i] = fantasy.NewUserMessage(*hookResult.Prompt, files...) break } } @@ -1527,6 +1531,18 @@ func (m *Kit) GetTools() []Tool { return m.agent.GetTools() } +// extractFileParts returns all FilePart entries from a message's Content. +// Used to preserve image attachments when replacing user message text. +func extractFileParts(msg fantasy.Message) []fantasy.FilePart { + var files []fantasy.FilePart + for _, part := range msg.Content { + if fp, ok := part.(fantasy.FilePart); ok { + files = append(files, fp) + } + } + return files +} + // Close cleans up resources including MCP server connections, model resources, // and the tree session file handle. Should be called when the Kit instance is // no longer needed. Returns an error if cleanup fails.