fix(mcp): surface MCP tool failures as soft errors, not critical aborts

The MCP adapter previously wrapped any error returned by MCPToolManager.ExecuteTool
into a Go error returned from the fantasy.AgentTool.Run interface. The fantasy
agent loop treats those as critical errors and aborts the entire turn —
discarding all prior reasoning, tool calls, and results.

In practice that meant a single misbehaved MCP server returning a JSON-RPC
"-32602 Invalid params" (e.g. a Zod schema mismatch on the server's input
validation) would kill an in-progress turn after the model had already done
dozens of seconds of useful work, with no way for the model to see the
validation message and self-correct.

This mismatched the contract that native Kit tools follow: native tools
return errors via kit.ErrorResult(...), which become soft tool-result errors
that the model reads and can act on (retry with corrected args, try a
different tool, give up gracefully).

Make the MCP path behave the same way:

  - JSON-RPC protocol errors, transport failures, and server-side schema
    rejections are now returned as fantasy.NewTextErrorResponse(...) with
    err == nil, so the agent loop continues and the model sees the failure
    in-band as a tool result it can reason about.
  - Context cancellation (ctx.Err() != nil) remains a critical error so
    callers can abort turns deterministically. This is the only case where
    bubbling up is correct — the caller intentionally tore the turn down
    and the agent must not keep spinning.
  - Server-side soft errors (CallToolResult{ isError: true }) and the
    happy path are unchanged.

The agent loop's MaxSteps cap already bounds the worst case for a
permanently broken MCP server, so there is no risk of unbounded retries.

Side effect: extracted a tiny mcpExecutor interface for the one method the
adapter uses (ExecuteTool), purely so the adapter is unit-testable in
isolation without standing up a full MCPToolManager + connection pool.

Behavior change note for downstream consumers: code that relied on
host.PromptResult / Stream returning a Go error containing
"mcp tool execution failed" will no longer see those errors — the
failure information is now in the assistant's final response (or in the
OnAfterToolResult / OnToolResult hooks, where IsError will be true).
Context cancellation continues to surface as an error from those calls
as before.
This commit is contained in:
space_cowboy
2026-05-13 19:48:13 +03:00
parent 35b9360d64
commit 70cd214175
2 changed files with 186 additions and 5 deletions
+28 -5
View File
@@ -9,12 +9,19 @@ import (
"github.com/mark3labs/kit/internal/tools"
)
// mcpExecutor is the subset of *tools.MCPToolManager that the adapter
// actually uses. Extracted as an interface so the adapter is unit-testable
// without constructing a full manager + connection pool.
type mcpExecutor interface {
ExecuteTool(ctx context.Context, prefixedName, inputJSON string) (*tools.MCPToolResult, error)
}
// mcpAgentTool adapts an tools.MCPTool to the fantasy.AgentTool interface.
// This keeps the fantasy dependency confined to the agent layer — the tools
// package is a pure MCP client library with no LLM framework dependency.
type mcpAgentTool struct {
tool tools.MCPTool
manager *tools.MCPToolManager
exec mcpExecutor
providerOptions fantasy.ProviderOptions
}
@@ -29,10 +36,26 @@ func (t *mcpAgentTool) Info() fantasy.ToolInfo {
}
// Run executes the MCP tool by delegating to the MCPToolManager.
//
// MCP-side failures (JSON-RPC protocol errors, transport failures, schema
// validation rejections from the server) are surfaced to the model as soft
// tool errors rather than escalated to a critical agent error. This matches
// the contract that native Kit tools follow via kit.ErrorResult(...) and
// lets the model self-correct (e.g. retry with a fixed argument shape) or
// give up gracefully rather than aborting the turn mid-run.
//
// Context cancellation is the one exception: if the caller cancelled the
// context the turn was aborted intentionally, so we propagate the ctx error
// to let the agent loop unwind cleanly.
func (t *mcpAgentTool) Run(ctx context.Context, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
result, err := t.manager.ExecuteTool(ctx, t.tool.Name, call.Input)
result, err := t.exec.ExecuteTool(ctx, t.tool.Name, call.Input)
if err != nil {
return fantasy.ToolResponse{}, fmt.Errorf("mcp tool execution failed: %w", err)
if ctxErr := ctx.Err(); ctxErr != nil {
return fantasy.ToolResponse{}, ctxErr
}
return fantasy.NewTextErrorResponse(
fmt.Sprintf("MCP tool %q failed: %s", t.tool.Name, err.Error()),
), nil
}
if result.IsError {
@@ -57,8 +80,8 @@ func mcpToolsToAgentTools(mcpTools []tools.MCPTool, manager *tools.MCPToolManage
agentTools := make([]fantasy.AgentTool, len(mcpTools))
for i, t := range mcpTools {
agentTools[i] = &mcpAgentTool{
tool: t,
manager: manager,
tool: t,
exec: manager,
}
}
return agentTools
+158
View File
@@ -0,0 +1,158 @@
package agent
import (
"context"
"errors"
"strings"
"testing"
"time"
"charm.land/fantasy"
"github.com/mark3labs/kit/internal/tools"
)
// stubExecutor lets each test script the (result, err) pair returned by
// ExecuteTool. The adapter holds an mcpExecutor interface, so this is the
// only seam the tests need.
type stubExecutor struct {
result *tools.MCPToolResult
err error
// called records the last invocation for assertion.
called bool
name string
input string
}
func (s *stubExecutor) ExecuteTool(_ context.Context, prefixedName, inputJSON string) (*tools.MCPToolResult, error) {
s.called = true
s.name = prefixedName
s.input = inputJSON
return s.result, s.err
}
func newMCPAgentTool(exec mcpExecutor, name string) *mcpAgentTool {
return &mcpAgentTool{
tool: tools.MCPTool{Name: name},
exec: exec,
}
}
// Manager-side Go errors (JSON-RPC protocol errors, transport failures,
// schema validation rejections from the MCP server) must be surfaced to
// the model as soft tool errors so the agent loop can keep going. Aborting
// the turn would discard all prior tool results — see issue #N.
func TestMCPAgentTool_RPCErrorBecomesSoftError(t *testing.T) {
exec := &stubExecutor{
err: errors.New("MCP error -32602: Invalid params: missing field \"task\""),
}
tool := newMCPAgentTool(exec, "pubmed__search")
resp, err := tool.Run(context.Background(), fantasy.ToolCall{
ID: "call-1",
Name: "pubmed__search",
Input: `{"query":"foo"}`,
})
if err != nil {
t.Fatalf("expected nil error (soft), got %v", err)
}
if !resp.IsError {
t.Fatalf("expected IsError=true, got false")
}
if !strings.Contains(resp.Content, "pubmed__search") {
t.Errorf("expected tool name in error content, got %q", resp.Content)
}
if !strings.Contains(resp.Content, "-32602") {
t.Errorf("expected underlying error text in content, got %q", resp.Content)
}
}
// Context cancellation is the one error that must remain critical: it
// means the caller intentionally aborted, and the agent loop needs to
// unwind cleanly rather than burning more steps.
func TestMCPAgentTool_CtxCancelStaysCritical(t *testing.T) {
exec := &stubExecutor{
// Real managers typically return ctx.Err() (or a wrapper) when the
// context is cancelled mid-call.
err: context.Canceled,
}
tool := newMCPAgentTool(exec, "slow__tool")
ctx, cancel := context.WithCancel(context.Background())
cancel()
resp, err := tool.Run(ctx, fantasy.ToolCall{Name: "slow__tool"})
if !errors.Is(err, context.Canceled) {
t.Fatalf("expected context.Canceled, got %v", err)
}
if resp.IsError || resp.Content != "" {
t.Errorf("expected empty response on critical error, got IsError=%v Content=%q", resp.IsError, resp.Content)
}
}
// Deadline-exceeded behaves the same as cancellation: ctx.Err() is
// non-nil, so the adapter must propagate the critical error rather than
// converting the executor's error into a soft response.
func TestMCPAgentTool_CtxDeadlineStaysCritical(t *testing.T) {
exec := &stubExecutor{err: context.DeadlineExceeded}
tool := newMCPAgentTool(exec, "slow__tool")
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(-time.Second))
defer cancel()
resp, err := tool.Run(ctx, fantasy.ToolCall{Name: "slow__tool"})
if !errors.Is(err, context.DeadlineExceeded) {
t.Fatalf("expected context.DeadlineExceeded, got %v", err)
}
if resp.IsError || resp.Content != "" {
t.Errorf("expected empty response on critical error, got IsError=%v Content=%q", resp.IsError, resp.Content)
}
}
// Server-side soft errors (CallToolResult{ isError: true }) must continue
// to flow through as soft errors — this was the existing behavior and
// must not regress.
func TestMCPAgentTool_ServerIsErrorRemainsSoftError(t *testing.T) {
exec := &stubExecutor{
result: &tools.MCPToolResult{
IsError: true,
Content: "search service is rate limited; try again in 30s",
},
}
tool := newMCPAgentTool(exec, "pubmed__search")
resp, err := tool.Run(context.Background(), fantasy.ToolCall{Name: "pubmed__search"})
if err != nil {
t.Fatalf("expected nil error, got %v", err)
}
if !resp.IsError {
t.Fatalf("expected IsError=true, got false")
}
if resp.Content != "search service is rate limited; try again in 30s" {
t.Errorf("expected pass-through content, got %q", resp.Content)
}
}
// Happy path: ordinary successful tool result is passed through unchanged.
func TestMCPAgentTool_SuccessIsPassthrough(t *testing.T) {
exec := &stubExecutor{
result: &tools.MCPToolResult{
IsError: false,
Content: `{"hits":3}`,
},
}
tool := newMCPAgentTool(exec, "pubmed__search")
resp, err := tool.Run(context.Background(), fantasy.ToolCall{Name: "pubmed__search"})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if resp.IsError {
t.Fatalf("expected IsError=false")
}
if resp.Content != `{"hits":3}` {
t.Errorf("expected pass-through content, got %q", resp.Content)
}
}