From 975c30a773e8d5dd438fd1bec4c2c39ab28dcac3 Mon Sep 17 00:00:00 2001 From: Ed Zynda Date: Wed, 13 May 2026 20:12:31 +0300 Subject: [PATCH] fix(mcp): surface MCP tool failures as soft errors, not critical aborts (#31) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MCP adapter previously wrapped any error returned by MCPToolManager.ExecuteTool into a Go error returned from the fantasy.AgentTool.Run interface. The fantasy agent loop treats those as critical errors and aborts the entire turn — discarding all prior reasoning, tool calls, and results. In practice that meant a single misbehaved MCP server returning a JSON-RPC "-32602 Invalid params" (e.g. a Zod schema mismatch on the server's input validation) would kill an in-progress turn after the model had already done dozens of seconds of useful work, with no way for the model to see the validation message and self-correct. This mismatched the contract that native Kit tools follow: native tools return errors via kit.ErrorResult(...), which become soft tool-result errors that the model reads and can act on (retry with corrected args, try a different tool, give up gracefully). Make the MCP path behave the same way: - JSON-RPC protocol errors, transport failures, and server-side schema rejections are now returned as fantasy.NewTextErrorResponse(...) with err == nil, so the agent loop continues and the model sees the failure in-band as a tool result it can reason about. - Context cancellation (ctx.Err() != nil) remains a critical error so callers can abort turns deterministically. This is the only case where bubbling up is correct — the caller intentionally tore the turn down and the agent must not keep spinning. - Server-side soft errors (CallToolResult{ isError: true }) and the happy path are unchanged. The agent loop's MaxSteps cap already bounds the worst case for a permanently broken MCP server, so there is no risk of unbounded retries. Side effect: extracted a tiny mcpExecutor interface for the one method the adapter uses (ExecuteTool), purely so the adapter is unit-testable in isolation without standing up a full MCPToolManager + connection pool. Behavior change note for downstream consumers: code that relied on host.PromptResult / Stream returning a Go error containing "mcp tool execution failed" will no longer see those errors — the failure information is now in the assistant's final response (or in the OnAfterToolResult / OnToolResult hooks, where IsError will be true). Context cancellation continues to surface as an error from those calls as before. Co-authored-by: space_cowboy --- internal/agent/mcp_adapter.go | 33 +++++- internal/agent/mcp_adapter_test.go | 158 +++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+), 5 deletions(-) create mode 100644 internal/agent/mcp_adapter_test.go diff --git a/internal/agent/mcp_adapter.go b/internal/agent/mcp_adapter.go index aef92832..50ef7b9e 100644 --- a/internal/agent/mcp_adapter.go +++ b/internal/agent/mcp_adapter.go @@ -9,12 +9,19 @@ import ( "github.com/mark3labs/kit/internal/tools" ) +// mcpExecutor is the subset of *tools.MCPToolManager that the adapter +// actually uses. Extracted as an interface so the adapter is unit-testable +// without constructing a full manager + connection pool. +type mcpExecutor interface { + ExecuteTool(ctx context.Context, prefixedName, inputJSON string) (*tools.MCPToolResult, error) +} + // mcpAgentTool adapts an tools.MCPTool to the fantasy.AgentTool interface. // This keeps the fantasy dependency confined to the agent layer — the tools // package is a pure MCP client library with no LLM framework dependency. type mcpAgentTool struct { tool tools.MCPTool - manager *tools.MCPToolManager + exec mcpExecutor providerOptions fantasy.ProviderOptions } @@ -29,10 +36,26 @@ func (t *mcpAgentTool) Info() fantasy.ToolInfo { } // Run executes the MCP tool by delegating to the MCPToolManager. +// +// MCP-side failures (JSON-RPC protocol errors, transport failures, schema +// validation rejections from the server) are surfaced to the model as soft +// tool errors rather than escalated to a critical agent error. This matches +// the contract that native Kit tools follow via kit.ErrorResult(...) and +// lets the model self-correct (e.g. retry with a fixed argument shape) or +// give up gracefully rather than aborting the turn mid-run. +// +// Context cancellation is the one exception: if the caller cancelled the +// context the turn was aborted intentionally, so we propagate the ctx error +// to let the agent loop unwind cleanly. func (t *mcpAgentTool) Run(ctx context.Context, call fantasy.ToolCall) (fantasy.ToolResponse, error) { - result, err := t.manager.ExecuteTool(ctx, t.tool.Name, call.Input) + result, err := t.exec.ExecuteTool(ctx, t.tool.Name, call.Input) if err != nil { - return fantasy.ToolResponse{}, fmt.Errorf("mcp tool execution failed: %w", err) + if ctxErr := ctx.Err(); ctxErr != nil { + return fantasy.ToolResponse{}, ctxErr + } + return fantasy.NewTextErrorResponse( + fmt.Sprintf("MCP tool %q failed: %s", t.tool.Name, err.Error()), + ), nil } if result.IsError { @@ -57,8 +80,8 @@ func mcpToolsToAgentTools(mcpTools []tools.MCPTool, manager *tools.MCPToolManage agentTools := make([]fantasy.AgentTool, len(mcpTools)) for i, t := range mcpTools { agentTools[i] = &mcpAgentTool{ - tool: t, - manager: manager, + tool: t, + exec: manager, } } return agentTools diff --git a/internal/agent/mcp_adapter_test.go b/internal/agent/mcp_adapter_test.go new file mode 100644 index 00000000..b192d905 --- /dev/null +++ b/internal/agent/mcp_adapter_test.go @@ -0,0 +1,158 @@ +package agent + +import ( + "context" + "errors" + "strings" + "testing" + "time" + + "charm.land/fantasy" + + "github.com/mark3labs/kit/internal/tools" +) + +// stubExecutor lets each test script the (result, err) pair returned by +// ExecuteTool. The adapter holds an mcpExecutor interface, so this is the +// only seam the tests need. +type stubExecutor struct { + result *tools.MCPToolResult + err error + // called records the last invocation for assertion. + called bool + name string + input string +} + +func (s *stubExecutor) ExecuteTool(_ context.Context, prefixedName, inputJSON string) (*tools.MCPToolResult, error) { + s.called = true + s.name = prefixedName + s.input = inputJSON + return s.result, s.err +} + +func newMCPAgentTool(exec mcpExecutor, name string) *mcpAgentTool { + return &mcpAgentTool{ + tool: tools.MCPTool{Name: name}, + exec: exec, + } +} + +// Manager-side Go errors (JSON-RPC protocol errors, transport failures, +// schema validation rejections from the MCP server) must be surfaced to +// the model as soft tool errors so the agent loop can keep going. Aborting +// the turn would discard all prior tool results — see issue #N. +func TestMCPAgentTool_RPCErrorBecomesSoftError(t *testing.T) { + exec := &stubExecutor{ + err: errors.New("MCP error -32602: Invalid params: missing field \"task\""), + } + tool := newMCPAgentTool(exec, "pubmed__search") + + resp, err := tool.Run(context.Background(), fantasy.ToolCall{ + ID: "call-1", + Name: "pubmed__search", + Input: `{"query":"foo"}`, + }) + + if err != nil { + t.Fatalf("expected nil error (soft), got %v", err) + } + if !resp.IsError { + t.Fatalf("expected IsError=true, got false") + } + if !strings.Contains(resp.Content, "pubmed__search") { + t.Errorf("expected tool name in error content, got %q", resp.Content) + } + if !strings.Contains(resp.Content, "-32602") { + t.Errorf("expected underlying error text in content, got %q", resp.Content) + } +} + +// Context cancellation is the one error that must remain critical: it +// means the caller intentionally aborted, and the agent loop needs to +// unwind cleanly rather than burning more steps. +func TestMCPAgentTool_CtxCancelStaysCritical(t *testing.T) { + exec := &stubExecutor{ + // Real managers typically return ctx.Err() (or a wrapper) when the + // context is cancelled mid-call. + err: context.Canceled, + } + tool := newMCPAgentTool(exec, "slow__tool") + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + resp, err := tool.Run(ctx, fantasy.ToolCall{Name: "slow__tool"}) + + if !errors.Is(err, context.Canceled) { + t.Fatalf("expected context.Canceled, got %v", err) + } + if resp.IsError || resp.Content != "" { + t.Errorf("expected empty response on critical error, got IsError=%v Content=%q", resp.IsError, resp.Content) + } +} + +// Deadline-exceeded behaves the same as cancellation: ctx.Err() is +// non-nil, so the adapter must propagate the critical error rather than +// converting the executor's error into a soft response. +func TestMCPAgentTool_CtxDeadlineStaysCritical(t *testing.T) { + exec := &stubExecutor{err: context.DeadlineExceeded} + tool := newMCPAgentTool(exec, "slow__tool") + + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(-time.Second)) + defer cancel() + + resp, err := tool.Run(ctx, fantasy.ToolCall{Name: "slow__tool"}) + if !errors.Is(err, context.DeadlineExceeded) { + t.Fatalf("expected context.DeadlineExceeded, got %v", err) + } + if resp.IsError || resp.Content != "" { + t.Errorf("expected empty response on critical error, got IsError=%v Content=%q", resp.IsError, resp.Content) + } +} + +// Server-side soft errors (CallToolResult{ isError: true }) must continue +// to flow through as soft errors — this was the existing behavior and +// must not regress. +func TestMCPAgentTool_ServerIsErrorRemainsSoftError(t *testing.T) { + exec := &stubExecutor{ + result: &tools.MCPToolResult{ + IsError: true, + Content: "search service is rate limited; try again in 30s", + }, + } + tool := newMCPAgentTool(exec, "pubmed__search") + + resp, err := tool.Run(context.Background(), fantasy.ToolCall{Name: "pubmed__search"}) + if err != nil { + t.Fatalf("expected nil error, got %v", err) + } + if !resp.IsError { + t.Fatalf("expected IsError=true, got false") + } + if resp.Content != "search service is rate limited; try again in 30s" { + t.Errorf("expected pass-through content, got %q", resp.Content) + } +} + +// Happy path: ordinary successful tool result is passed through unchanged. +func TestMCPAgentTool_SuccessIsPassthrough(t *testing.T) { + exec := &stubExecutor{ + result: &tools.MCPToolResult{ + IsError: false, + Content: `{"hits":3}`, + }, + } + tool := newMCPAgentTool(exec, "pubmed__search") + + resp, err := tool.Run(context.Background(), fantasy.ToolCall{Name: "pubmed__search"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if resp.IsError { + t.Fatalf("expected IsError=false") + } + if resp.Content != `{"hits":3}` { + t.Errorf("expected pass-through content, got %q", resp.Content) + } +}