diff --git a/internal/agent/agent.go b/internal/agent/agent.go index 82c744a3..edb804b8 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -228,8 +228,10 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan ) (*GenerateWithLoopResult, error) { // Fantasy requires the current user input as Prompt, with prior messages as history. - // Extract the last user message text as the prompt, and pass everything before it as Messages. - prompt, history := splitPromptAndHistory(messages) + // Extract the last user message text and files as the prompt, and pass everything + // before it as Messages. Files (e.g. clipboard images) are passed via the Files + // field so Fantasy includes them in the API request. + prompt, files, history := splitPromptAndHistory(messages) // Track current tool call info for callbacks var currentToolName string @@ -246,6 +248,7 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan // Use fantasy's streaming agent result, err := a.fantasyAgent.Stream(ctx, fantasy.AgentStreamCall{ Prompt: prompt, + Files: files, Messages: history, // Reasoning/thinking streaming callback @@ -340,6 +343,7 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan // Non-streaming path with no callbacks — use the simpler Generate call. result, err := a.fantasyAgent.Generate(ctx, fantasy.AgentCall{ Prompt: prompt, + Files: files, Messages: history, }) if err != nil { @@ -360,27 +364,32 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan // and returns everything before it as conversation history. Fantasy's agent // requires the current turn's input as Prompt (string), with prior messages // passed separately as Messages (history). -func splitPromptAndHistory(messages []fantasy.Message) (string, []fantasy.Message) { +func splitPromptAndHistory(messages []fantasy.Message) (string, []fantasy.FilePart, []fantasy.Message) { if len(messages) == 0 { - return "", nil + return "", nil, nil } // Walk backwards to find the last user message for i := len(messages) - 1; i >= 0; i-- { if messages[i].Role == fantasy.MessageRoleUser { - // Extract text from the user message parts + // Extract text and file parts from the user message var prompt string + var files []fantasy.FilePart for _, part := range messages[i].Content { - if tp, ok := part.(fantasy.TextPart); ok { - prompt = tp.Text - break + switch p := part.(type) { + case fantasy.TextPart: + if prompt == "" { + prompt = p.Text + } + case fantasy.FilePart: + files = append(files, p) } } // History is everything except this last user message history := make([]fantasy.Message, 0, len(messages)-1) history = append(history, messages[:i]...) history = append(history, messages[i+1:]...) - return prompt, history + return prompt, files, history } } @@ -388,11 +397,11 @@ func splitPromptAndHistory(messages []fantasy.Message) (string, []fantasy.Messag last := messages[len(messages)-1] for _, part := range last.Content { if tp, ok := part.(fantasy.TextPart); ok { - return tp.Text, messages[:len(messages)-1] + return tp.Text, nil, messages[:len(messages)-1] } } - return "", messages + return "", nil, messages } // convertAgentResult converts a fantasy AgentResult to our GenerateWithLoopResult. diff --git a/pkg/kit/kit.go b/pkg/kit/kit.go index b07dd4ed..19a7c961 100644 --- a/pkg/kit/kit.go +++ b/pkg/kit/kit.go @@ -1222,10 +1222,12 @@ func (m *Kit) runTurn(ctx context.Context, promptLabel string, prompt string, pr // block, and appends any trailing user args. if expanded := m.expandSkillCommand(prompt); expanded != prompt { prompt = expanded - // Replace the last user message in preMessages with the expanded text. + // Replace the last user message in preMessages with the expanded text, + // preserving any file parts (e.g. clipboard images). for i := len(preMessages) - 1; i >= 0; i-- { if preMessages[i].Role == fantasy.MessageRoleUser { - preMessages[i] = fantasy.NewUserMessage(expanded) + files := extractFileParts(preMessages[i]) + preMessages[i] = fantasy.NewUserMessage(expanded, files...) break } } @@ -1234,11 +1236,13 @@ func (m *Kit) runTurn(ctx context.Context, promptLabel string, prompt string, pr // Run BeforeTurn hooks — can modify the prompt, inject system/context messages. if m.beforeTurn.hasHooks() { if hookResult := m.beforeTurn.run(BeforeTurnHook{Prompt: prompt}); hookResult != nil { - // Override prompt text in the last user message. + // Override prompt text in the last user message, preserving + // any file parts (e.g. clipboard images). if hookResult.Prompt != nil { for i := len(preMessages) - 1; i >= 0; i-- { if preMessages[i].Role == fantasy.MessageRoleUser { - preMessages[i] = fantasy.NewUserMessage(*hookResult.Prompt) + files := extractFileParts(preMessages[i]) + preMessages[i] = fantasy.NewUserMessage(*hookResult.Prompt, files...) break } } @@ -1527,6 +1531,18 @@ func (m *Kit) GetTools() []Tool { return m.agent.GetTools() } +// extractFileParts returns all FilePart entries from a message's Content. +// Used to preserve image attachments when replacing user message text. +func extractFileParts(msg fantasy.Message) []fantasy.FilePart { + var files []fantasy.FilePart + for _, part := range msg.Content { + if fp, ok := part.(fantasy.FilePart); ok { + files = append(files, fp) + } + } + return files +} + // Close cleans up resources including MCP server connections, model resources, // and the tree session file handle. Should be called when the Kit instance is // no longer needed. Returns an error if cleanup fails.