fix(mcp): surface MCP tool failures as soft errors, not critical aborts

The MCP adapter previously wrapped any error returned by MCPToolManager.ExecuteTool into a Go error returned from the fantasy.AgentTool.Run interface. The fantasy agent loop treats those as critical errors and aborts the entire turn — discarding all prior reasoning, tool calls, and results. In practice that meant a single misbehaved MCP server returning a JSON-RPC "-32602 Invalid params" (e.g. a Zod schema mismatch on the server's input validation) would kill an in-progress turn after the model had already done dozens of seconds of useful work, with no way for the model to see the validation message and self-correct. This mismatched the contract that native Kit tools follow: native tools return errors via kit.ErrorResult(...), which become soft tool-result errors that the model reads and can act on (retry with corrected args, try a different tool, give up gracefully). Make the MCP path behave the same way: - JSON-RPC protocol errors, transport failures, and server-side schema rejections are now returned as fantasy.NewTextErrorResponse(...) with err == nil, so the agent loop continues and the model sees the failure in-band as a tool result it can reason about. - Context cancellation (ctx.Err() != nil) remains a critical error so callers can abort turns deterministically. This is the only case where bubbling up is correct — the caller intentionally tore the turn down and the agent must not keep spinning. - Server-side soft errors (CallToolResult{ isError: true }) and the happy path are unchanged. The agent loop's MaxSteps cap already bounds the worst case for a permanently broken MCP server, so there is no risk of unbounded retries. Side effect: extracted a tiny mcpExecutor interface for the one method the adapter uses (ExecuteTool), purely so the adapter is unit-testable in isolation without standing up a full MCPToolManager + connection pool. Behavior change note for downstream consumers: code that relied on host.PromptResult / Stream returning a Go error containing "mcp tool execution failed" will no longer see those errors — the failure information is now in the assistant's final response (or in the OnAfterToolResult / OnToolResult hooks, where IsError will be true). Context cancellation continues to surface as an error from those calls as before.
2026-06-14 03:30:26 +00:00 · 2026-05-13 19:48:13 +03:00
parent 35b9360d64
commit 70cd214175
2 changed files with 186 additions and 5 deletions
@@ -9,12 +9,19 @@ import (
 	"github.com/mark3labs/kit/internal/tools"
 )

+// mcpExecutor is the subset of *tools.MCPToolManager that the adapter
+// actually uses. Extracted as an interface so the adapter is unit-testable
+// without constructing a full manager + connection pool.
+type mcpExecutor interface {
+	ExecuteTool(ctx context.Context, prefixedName, inputJSON string) (*tools.MCPToolResult, error)
+}
+
 // mcpAgentTool adapts an tools.MCPTool to the fantasy.AgentTool interface.
 // This keeps the fantasy dependency confined to the agent layer — the tools
 // package is a pure MCP client library with no LLM framework dependency.
 type mcpAgentTool struct {
 	tool            tools.MCPTool
-	manager         *tools.MCPToolManager
+	exec            mcpExecutor
 	providerOptions fantasy.ProviderOptions
 }

@@ -29,10 +36,26 @@ func (t *mcpAgentTool) Info() fantasy.ToolInfo {
 }

 // Run executes the MCP tool by delegating to the MCPToolManager.
+//
+// MCP-side failures (JSON-RPC protocol errors, transport failures, schema
+// validation rejections from the server) are surfaced to the model as soft
+// tool errors rather than escalated to a critical agent error. This matches
+// the contract that native Kit tools follow via kit.ErrorResult(...) and
+// lets the model self-correct (e.g. retry with a fixed argument shape) or
+// give up gracefully rather than aborting the turn mid-run.
+//
+// Context cancellation is the one exception: if the caller cancelled the
+// context the turn was aborted intentionally, so we propagate the ctx error
+// to let the agent loop unwind cleanly.
 func (t *mcpAgentTool) Run(ctx context.Context, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
-	result, err := t.manager.ExecuteTool(ctx, t.tool.Name, call.Input)
+	result, err := t.exec.ExecuteTool(ctx, t.tool.Name, call.Input)
 	if err != nil {
-		return fantasy.ToolResponse{}, fmt.Errorf("mcp tool execution failed: %w", err)
+		if ctxErr := ctx.Err(); ctxErr != nil {
+			return fantasy.ToolResponse{}, ctxErr
+		}
+		return fantasy.NewTextErrorResponse(
+			fmt.Sprintf("MCP tool %q failed: %s", t.tool.Name, err.Error()),
+		), nil
 	}

 	if result.IsError {
@@ -57,8 +80,8 @@ func mcpToolsToAgentTools(mcpTools []tools.MCPTool, manager *tools.MCPToolManage
 	agentTools := make([]fantasy.AgentTool, len(mcpTools))
 	for i, t := range mcpTools {
 		agentTools[i] = &mcpAgentTool{
-			tool:    t,
-			manager: manager,
+			tool: t,
+			exec: manager,
 		}
 	}
 	return agentTools
@@ -0,0 +1,158 @@
+package agent
+
+import (
+	"context"
+	"errors"
+	"strings"
+	"testing"
+	"time"
+
+	"charm.land/fantasy"
+
+	"github.com/mark3labs/kit/internal/tools"
+)
+
+// stubExecutor lets each test script the (result, err) pair returned by
+// ExecuteTool. The adapter holds an mcpExecutor interface, so this is the
+// only seam the tests need.
+type stubExecutor struct {
+	result *tools.MCPToolResult
+	err    error
+	// called records the last invocation for assertion.
+	called bool
+	name   string
+	input  string
+}
+
+func (s *stubExecutor) ExecuteTool(_ context.Context, prefixedName, inputJSON string) (*tools.MCPToolResult, error) {
+	s.called = true
+	s.name = prefixedName
+	s.input = inputJSON
+	return s.result, s.err
+}
+
+func newMCPAgentTool(exec mcpExecutor, name string) *mcpAgentTool {
+	return &mcpAgentTool{
+		tool: tools.MCPTool{Name: name},
+		exec: exec,
+	}
+}
+
+// Manager-side Go errors (JSON-RPC protocol errors, transport failures,
+// schema validation rejections from the MCP server) must be surfaced to
+// the model as soft tool errors so the agent loop can keep going. Aborting
+// the turn would discard all prior tool results — see issue #N.
+func TestMCPAgentTool_RPCErrorBecomesSoftError(t *testing.T) {
+	exec := &stubExecutor{
+		err: errors.New("MCP error -32602: Invalid params: missing field \"task\""),
+	}
+	tool := newMCPAgentTool(exec, "pubmed__search")
+
+	resp, err := tool.Run(context.Background(), fantasy.ToolCall{
+		ID:    "call-1",
+		Name:  "pubmed__search",
+		Input: `{"query":"foo"}`,
+	})
+
+	if err != nil {
+		t.Fatalf("expected nil error (soft), got %v", err)
+	}
+	if !resp.IsError {
+		t.Fatalf("expected IsError=true, got false")
+	}
+	if !strings.Contains(resp.Content, "pubmed__search") {
+		t.Errorf("expected tool name in error content, got %q", resp.Content)
+	}
+	if !strings.Contains(resp.Content, "-32602") {
+		t.Errorf("expected underlying error text in content, got %q", resp.Content)
+	}
+}
+
+// Context cancellation is the one error that must remain critical: it
+// means the caller intentionally aborted, and the agent loop needs to
+// unwind cleanly rather than burning more steps.
+func TestMCPAgentTool_CtxCancelStaysCritical(t *testing.T) {
+	exec := &stubExecutor{
+		// Real managers typically return ctx.Err() (or a wrapper) when the
+		// context is cancelled mid-call.
+		err: context.Canceled,
+	}
+	tool := newMCPAgentTool(exec, "slow__tool")
+
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+
+	resp, err := tool.Run(ctx, fantasy.ToolCall{Name: "slow__tool"})
+
+	if !errors.Is(err, context.Canceled) {
+		t.Fatalf("expected context.Canceled, got %v", err)
+	}
+	if resp.IsError || resp.Content != "" {
+		t.Errorf("expected empty response on critical error, got IsError=%v Content=%q", resp.IsError, resp.Content)
+	}
+}
+
+// Deadline-exceeded behaves the same as cancellation: ctx.Err() is
+// non-nil, so the adapter must propagate the critical error rather than
+// converting the executor's error into a soft response.
+func TestMCPAgentTool_CtxDeadlineStaysCritical(t *testing.T) {
+	exec := &stubExecutor{err: context.DeadlineExceeded}
+	tool := newMCPAgentTool(exec, "slow__tool")
+
+	ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(-time.Second))
+	defer cancel()
+
+	resp, err := tool.Run(ctx, fantasy.ToolCall{Name: "slow__tool"})
+	if !errors.Is(err, context.DeadlineExceeded) {
+		t.Fatalf("expected context.DeadlineExceeded, got %v", err)
+	}
+	if resp.IsError || resp.Content != "" {
+		t.Errorf("expected empty response on critical error, got IsError=%v Content=%q", resp.IsError, resp.Content)
+	}
+}
+
+// Server-side soft errors (CallToolResult{ isError: true }) must continue
+// to flow through as soft errors — this was the existing behavior and
+// must not regress.
+func TestMCPAgentTool_ServerIsErrorRemainsSoftError(t *testing.T) {
+	exec := &stubExecutor{
+		result: &tools.MCPToolResult{
+			IsError: true,
+			Content: "search service is rate limited; try again in 30s",
+		},
+	}
+	tool := newMCPAgentTool(exec, "pubmed__search")
+
+	resp, err := tool.Run(context.Background(), fantasy.ToolCall{Name: "pubmed__search"})
+	if err != nil {
+		t.Fatalf("expected nil error, got %v", err)
+	}
+	if !resp.IsError {
+		t.Fatalf("expected IsError=true, got false")
+	}
+	if resp.Content != "search service is rate limited; try again in 30s" {
+		t.Errorf("expected pass-through content, got %q", resp.Content)
+	}
+}
+
+// Happy path: ordinary successful tool result is passed through unchanged.
+func TestMCPAgentTool_SuccessIsPassthrough(t *testing.T) {
+	exec := &stubExecutor{
+		result: &tools.MCPToolResult{
+			IsError: false,
+			Content: `{"hits":3}`,
+		},
+	}
+	tool := newMCPAgentTool(exec, "pubmed__search")
+
+	resp, err := tool.Run(context.Background(), fantasy.ToolCall{Name: "pubmed__search"})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if resp.IsError {
+		t.Fatalf("expected IsError=false")
+	}
+	if resp.Content != `{"hits":3}` {
+		t.Errorf("expected pass-through content, got %q", resp.Content)
+	}
+}