remove legacy app layer code: AgentRunner, executeStepLegacy, updateUsage

The app layer now exclusively uses the SDK path (kit.PromptResult). - Remove AgentRunner interface and Agent field from Options - Remove executeStepLegacy and updateUsage (SDK handles both) - Remove Extensions field from Options (SDK owns extension lifecycle) - Simplify StepCompleteEvent to carry ResponseText string only - Add PromptFunc test hook to Options for stub-based testing - Remove dead wrappers from cmd/setup.go (SetupAgent, BuildProviderConfig) - Update all tests to use stubPrompt/PromptFunc instead of stubAgent
2026-06-14 03:30:26 +00:00 · 2026-02-27 14:39:48 +03:00
parent 199297ce7e
commit 078daaf8ce
8 changed files with 109 additions and 424 deletions
@@ -395,7 +395,7 @@ func runNormalMode(ctx context.Context) error {

 	// Create the app.App instance.
 	extRunner := kitInstance.GetExtRunner()
-	appOpts := BuildAppOptions(mcpAgent, mcpConfig, modelName, serverNames, toolNames, extRunner)
+	appOpts := BuildAppOptions(mcpConfig, modelName, serverNames, toolNames)
 	appOpts.Kit = kitInstance
 	appOpts.TreeSession = treeSession

@@ -1,39 +1,16 @@
 package cmd

 import (
-	"context"
 	"strings"

 	"github.com/mark3labs/kit/internal/agent"
 	"github.com/mark3labs/kit/internal/app"
 	"github.com/mark3labs/kit/internal/config"
-	"github.com/mark3labs/kit/internal/extensions"
 	"github.com/mark3labs/kit/internal/ui"
 	kit "github.com/mark3labs/kit/pkg/kit"
 	"github.com/spf13/viper"
 )

-// AgentSetupOptions is the CLI-facing alias for kit.AgentSetupOptions.
-// The CLI adds the Quiet field from the package-level quietFlag.
-type AgentSetupOptions = kit.AgentSetupOptions
-
-// AgentSetupResult is the CLI-facing alias for kit.AgentSetupResult.
-type AgentSetupResult = kit.AgentSetupResult
-
-// BuildProviderConfig delegates to the SDK to build a ProviderConfig from
-// the current viper state.
-func BuildProviderConfig() (*kit.ProviderConfig, string, error) {
-	return kit.BuildProviderConfig()
-}
-
-// SetupAgent creates an agent from the current viper state. It delegates to
-// the SDK's SetupAgent, injecting the CLI-specific quietFlag.
-func SetupAgent(ctx context.Context, opts AgentSetupOptions) (*AgentSetupResult, error) {
-	// Inject CLI-specific quiet flag into the SDK options.
-	opts.Quiet = quietFlag
-	return kit.SetupAgent(ctx, opts)
-}
-
 // CollectAgentMetadata extracts model display info and tool/server name lists
 // from the agent, used to populate app.Options and UI setup.
 func CollectAgentMetadata(mcpAgent *agent.Agent, mcpConfig *config.Config) (provider, modelName string, serverNames, toolNames []string) {
@@ -56,9 +33,8 @@ func CollectAgentMetadata(mcpAgent *agent.Agent, mcpConfig *config.Config) (prov
 }

 // BuildAppOptions constructs the app.Options struct from the current state.
-func BuildAppOptions(mcpAgent *agent.Agent, mcpConfig *config.Config, modelName string, serverNames, toolNames []string, extRunner *extensions.Runner) app.Options {
+func BuildAppOptions(mcpConfig *config.Config, modelName string, serverNames, toolNames []string) app.Options {
 	return app.Options{
-		Agent:            mcpAgent,
 		MCPConfig:        mcpConfig,
 		ModelName:        modelName,
 		ServerNames:      serverNames,
@@ -67,7 +43,6 @@ func BuildAppOptions(mcpAgent *agent.Agent, mcpConfig *config.Config, modelName
 		Quiet:            quietFlag,
 		Debug:            viper.GetBool("debug"),
 		CompactMode:      viper.GetBool("compact"),
-		Extensions:       extRunner,
 	}
 }

@@ -8,7 +8,6 @@ import (
 	tea "charm.land/bubbletea/v2"
 	"charm.land/fantasy"

-	"github.com/mark3labs/kit/internal/agent"
 	"github.com/mark3labs/kit/internal/extensions"
 	"github.com/mark3labs/kit/internal/session"
 	kit "github.com/mark3labs/kit/pkg/kit"
@@ -190,18 +189,8 @@ func (a *App) RunOnce(ctx context.Context, prompt string) error {
 		return err
 	}

-	// Record token usage for the completed step (legacy path only — the SDK
-	// path records usage inside executeStep via updateUsageFromTurnResult).
-	if a.opts.Kit == nil {
-		a.updateUsage(result, prompt)
-	}
-
-	responseText := ""
-	if result.FinalResponse != nil {
-		responseText = result.FinalResponse.Content.Text()
-	}
-	if responseText != "" {
-		fmt.Println(responseText)
+	if result.Response != "" {
+		fmt.Println(result.Response)
 	}
 	return nil
 }
@@ -229,17 +218,9 @@ func (a *App) RunOnceWithDisplay(ctx context.Context, prompt string, eventFn fun
 		return err
 	}

-	// Record token usage (legacy path only — SDK path handles it in executeStep).
-	if a.opts.Kit == nil {
-		a.updateUsage(result, prompt)
-	}
-
 	// Send step complete so the display handler can render the final response.
-	if eventFn != nil && result.FinalResponse != nil {
-		eventFn(StepCompleteEvent{
-			Response: result.FinalResponse,
-			Usage:    result.TotalUsage,
-		})
+	if eventFn != nil {
+		eventFn(StepCompleteEvent{ResponseText: result.Response})
 	}

 	return nil
@@ -250,7 +231,7 @@ func (a *App) RunOnceWithDisplay(ctx context.Context, prompt string, eventFn fun
 // --------------------------------------------------------------------------

 // Close signals all background goroutines to stop and waits for them to finish.
-// After Close() returns it is safe to call agent.Close().
+// After Close() returns it is safe to call Kit.Close() / agent.Close().
 func (a *App) Close() {
 	a.mu.Lock()
 	if a.closed {
@@ -261,24 +242,12 @@ func (a *App) Close() {
 	cancel := a.cancelStep
 	a.mu.Unlock()

-	// SessionShutdown is emitted by Kit.Close() when the SDK path is active.
-	// For the legacy path, emit here.
-	if a.opts.Kit == nil && a.opts.Extensions != nil && a.opts.Extensions.HasHandlers(extensions.SessionShutdown) {
-		_, _ = a.opts.Extensions.Emit(extensions.SessionShutdownEvent{})
-	}
-
 	// Cancel any in-flight step and the root context.
 	cancel()
 	a.rootCancel()

 	// Wait for background goroutines.
 	a.wg.Wait()
-
-	// Close tree session file handle (legacy path only — Kit.Close() handles
-	// session cleanup when the SDK path is active).
-	if a.opts.Kit == nil && a.opts.TreeSession != nil {
-		_ = a.opts.TreeSession.Close()
-	}
 }

 // --------------------------------------------------------------------------
@@ -351,28 +320,20 @@ func (a *App) runPrompt(prompt string) {
 		return
 	}

-	// Record token usage (legacy path only — SDK path handles it in executeStep).
-	if a.opts.Kit == nil {
-		a.updateUsage(result, prompt)
-	}
-
-	a.sendEvent(StepCompleteEvent{
-		Response: result.FinalResponse,
-		Usage:    result.TotalUsage,
-	})
+	a.sendEvent(StepCompleteEvent{ResponseText: result.Response})
 }

 // --------------------------------------------------------------------------
 // Internal: single agent step
 // --------------------------------------------------------------------------

-// executeStep runs a single agentic step. When opts.Kit is set, it delegates
-// to the SDK's PromptResult() which handles session persistence, hooks,
-// extension events, and the generation loop. Otherwise it falls back to
-// executeStepLegacy() for tests that supply a stub AgentRunner.
-func (a *App) executeStep(ctx context.Context, prompt string, eventFn func(tea.Msg)) (*agent.GenerateWithLoopResult, error) {
-	if a.opts.Kit == nil {
-		return a.executeStepLegacy(ctx, prompt, eventFn)
+// executeStep runs a single agentic step by delegating to the SDK's
+// PromptResult(), which handles session persistence, hooks, extension
+// events, and the generation loop.
+func (a *App) executeStep(ctx context.Context, prompt string, eventFn func(tea.Msg)) (*kit.TurnResult, error) {
+	// Test hook: bypass SDK entirely.
+	if a.opts.PromptFunc != nil {
+		return a.opts.PromptFunc(ctx, prompt)
 	}

 	sendFn := func(msg tea.Msg) {
@@ -400,84 +361,6 @@ func (a *App) executeStep(ctx context.Context, prompt string, eventFn func(tea.M
 	// Update usage tracker.
 	a.updateUsageFromTurnResult(result, prompt)

-	return &agent.GenerateWithLoopResult{
-		ConversationMessages: result.Messages,
-	}, nil
-}
-
-// executeStepLegacy is the original executeStep implementation used when
-// opts.Kit is nil (e.g. in tests with a stub AgentRunner). It handles
-// extension events, session persistence, and the generation loop directly.
-func (a *App) executeStepLegacy(ctx context.Context, prompt string, eventFn func(tea.Msg)) (*agent.GenerateWithLoopResult, error) {
-	sendFn := func(msg tea.Msg) {
-		if eventFn != nil {
-			eventFn(msg)
-		}
-	}
-
-	// Add user message to the store immediately so history is consistent
-	// even if the step is later cancelled.
-	userMsg := fantasy.NewUserMessage(prompt)
-	a.store.Add(userMsg)
-
-	// Persist user message to tree session if configured.
-	if a.opts.TreeSession != nil {
-		_, _ = a.opts.TreeSession.AppendFantasyMessage(userMsg)
-	}
-
-	// Build the full message slice for the agent call.
-	var msgs []fantasy.Message
-	if a.opts.TreeSession != nil {
-		msgs = a.opts.TreeSession.GetFantasyMessages()
-	} else {
-		msgs = a.store.GetAll()
-	}
-
-	sentCount := len(msgs)
-
-	// Signal spinner start.
-	sendFn(SpinnerEvent{Show: true})
-
-	result, err := a.opts.Agent.GenerateWithLoopAndStreaming(ctx, msgs,
-		func(toolName, toolArgs string) {
-			sendFn(ToolCallStartedEvent{ToolName: toolName, ToolArgs: toolArgs})
-		},
-		func(toolName string, isStarting bool) {
-			sendFn(ToolExecutionEvent{ToolName: toolName, IsStarting: isStarting})
-		},
-		func(toolName, toolArgs, result string, isError bool) {
-			sendFn(ToolResultEvent{
-				ToolName: toolName,
-				ToolArgs: toolArgs,
-				Result:   result,
-				IsError:  isError,
-			})
-		},
-		func(content string) {
-			sendFn(ResponseCompleteEvent{Content: content})
-		},
-		func(content string) {
-			sendFn(ToolCallContentEvent{Content: content})
-		},
-		func(chunk string) {
-			sendFn(StreamChunkEvent{Content: chunk})
-		},
-	)
-
-	if err != nil {
-		return nil, err
-	}
-
-	// Replace the store with the full updated conversation returned by the agent.
-	a.store.Replace(result.ConversationMessages)
-
-	// Persist new messages to the tree session.
-	if a.opts.TreeSession != nil && len(result.ConversationMessages) > sentCount {
-		for _, msg := range result.ConversationMessages[sentCount:] {
-			_, _ = a.opts.TreeSession.AppendFantasyMessage(msg)
-		}
-	}
-
 	return result, nil
 }

@@ -571,47 +454,6 @@ func (a *App) PrintBlockFromExtension(opts extensions.PrintBlockOpts) {
 	}
 }

-// updateUsage records token usage from a completed agent step into the configured
-// UsageTracker (if any). It uses the actual token counts from the agent result's
-// TotalUsage field when available; otherwise it falls back to text-based estimation.
-//
-// TotalUsage is the sum across all tool-calling steps in a single agent run and
-// is used for session cost tracking. For context window utilization we use the
-// final response's per-call usage (FinalResponse.Usage) which reflects the actual
-// context size at the last API call.
-func (a *App) updateUsage(result *agent.GenerateWithLoopResult, userPrompt string) {
-	if a.opts.UsageTracker == nil || result == nil {
-		return
-	}
-
-	usage := result.TotalUsage
-	inputTokens := int(usage.InputTokens)
-	outputTokens := int(usage.OutputTokens)
-	if inputTokens > 0 && outputTokens > 0 {
-		cacheReadTokens := int(usage.CacheReadTokens)
-		cacheWriteTokens := int(usage.CacheCreationTokens)
-		a.opts.UsageTracker.UpdateUsage(inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens)
-	} else {
-		// Fall back to text-based estimation when the provider omits token counts.
-		responseText := ""
-		if result.FinalResponse != nil {
-			responseText = result.FinalResponse.Content.Text()
-		}
-		a.opts.UsageTracker.EstimateAndUpdateUsage(userPrompt, responseText)
-		return // EstimateAndUpdateUsage already sets context tokens internally
-	}
-
-	// Set context window utilization from the final API call's per-step usage.
-	// FinalResponse.Usage represents the last step only (not the aggregate),
-	// so input+output there reflects the actual context fill level.
-	if result.FinalResponse != nil {
-		fu := result.FinalResponse.Usage
-		if ct := int(fu.InputTokens) + int(fu.OutputTokens); ct > 0 {
-			a.opts.UsageTracker.SetContextTokens(ct)
-		}
-	}
-}
-
 // updateUsageFromTurnResult records token usage from an SDK TurnResult into the
 // configured UsageTracker. This is the SDK-path equivalent of updateUsage.
 func (a *App) updateUsageFromTurnResult(result *kit.TurnResult, userPrompt string) {
@@ -7,57 +7,29 @@ import (
 	"testing"
 	"time"

-	"charm.land/fantasy"
-
-	"github.com/mark3labs/kit/internal/agent"
+	kit "github.com/mark3labs/kit/pkg/kit"
 )

 // --------------------------------------------------------------------------
-// Stub agent
+// Helpers
 // --------------------------------------------------------------------------

-// stubAgent implements AgentRunner for tests. Each call to
-// GenerateWithLoopAndStreaming invokes the next function in the calls slice (in
-// order). If calls is empty it returns a zero-value result.
-//
-// It also supports blocking: if blockCh is non-nil, the stub blocks until a
-// value is sent on the channel (or ctx is cancelled).
-type stubAgent struct {
+// turnResult builds a minimal TurnResult with response text t.
+func turnResult(t string) *kit.TurnResult {
+	return &kit.TurnResult{Response: t}
+}
+
+// stubPromptFunc returns a PromptFunc that invokes successive functions from
+// fns. Each function can block, return errors, etc. If fns is exhausted, a
+// default success result is returned.
+type stubPrompt struct {
 	mu      sync.Mutex
-	calls   []func(ctx context.Context) (*agent.GenerateWithLoopResult, error)
-	callN   int           // index into calls
+	fns     []func(ctx context.Context) (*kit.TurnResult, error)
+	callN   int
 	blockCh chan struct{} // if non-nil, each call blocks until a value arrives
 }

-// newStubAgent creates a stub that returns the supplied results (in order) for
-// successive calls. Pass nil error elements via a helper.
-func newStubAgent(results ...*agent.GenerateWithLoopResult) *stubAgent {
-	s := &stubAgent{}
-	for _, r := range results {
-		s.calls = append(s.calls, func(_ context.Context) (*agent.GenerateWithLoopResult, error) {
-			return r, nil
-		})
-	}
-	return s
-}
-
-// newStubAgentWithFuncs creates a stub whose calls are governed by arbitrary
-// functions (each may inspect ctx, block, return errors, etc.).
-func newStubAgentWithFuncs(fns ...func(ctx context.Context) (*agent.GenerateWithLoopResult, error)) *stubAgent {
-	return &stubAgent{calls: fns}
-}
-
-func (s *stubAgent) GenerateWithLoopAndStreaming(
-	ctx context.Context,
-	_ []fantasy.Message,
-	_ agent.ToolCallHandler,
-	_ agent.ToolExecutionHandler,
-	_ agent.ToolResultHandler,
-	_ agent.ResponseHandler,
-	_ agent.ToolCallContentHandler,
-	_ agent.StreamingResponseHandler,
-) (*agent.GenerateWithLoopResult, error) {
-	// Optional blocking: wait for a signal or ctx cancellation.
+func (s *stubPrompt) fn(ctx context.Context, _ string) (*kit.TurnResult, error) {
 	if s.blockCh != nil {
 		select {
 		case <-s.blockCh:
@@ -71,39 +43,38 @@ func (s *stubAgent) GenerateWithLoopAndStreaming(
 	s.callN++
 	s.mu.Unlock()

-	if idx < len(s.calls) {
-		return s.calls[idx](ctx)
+	if idx < len(s.fns) {
+		return s.fns[idx](ctx)
 	}
-	// Default: return a minimal successful result.
-	return makeResult("default response"), nil
+	return turnResult("default response"), nil
 }

-// CallCount returns how many times the stub was called.
-func (s *stubAgent) CallCount() int {
+func (s *stubPrompt) callCount() int {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	return s.callN
 }

-// --------------------------------------------------------------------------
-// Helpers
-// --------------------------------------------------------------------------
-
-// makeResult builds a minimal GenerateWithLoopResult with response text t.
-func makeResult(t string) *agent.GenerateWithLoopResult {
-	resp := fantasy.Response{
-		Content: fantasy.ResponseContent{fantasy.TextPart{Text: t}},
-	}
-	return &agent.GenerateWithLoopResult{
-		FinalResponse:        &resp,
-		ConversationMessages: []fantasy.Message{},
-	}
+// newTestApp creates an App wired with the given stub prompt function.
+func newTestApp(s *stubPrompt) *App {
+	return New(Options{PromptFunc: s.fn}, nil)
 }

-// newTestApp creates an App wired with the given stubAgent. No session manager,
-// no hooks — minimal viable options for unit testing.
-func newTestApp(a AgentRunner) *App {
-	return New(Options{Agent: a}, nil)
+// newStub creates a stubPrompt that returns the given results in order.
+func newStub(results ...string) *stubPrompt {
+	s := &stubPrompt{}
+	for _, r := range results {
+		s.fns = append(s.fns, func(_ context.Context) (*kit.TurnResult, error) {
+			return turnResult(r), nil
+		})
+	}
+	return s
+}
+
+// newStubWithFuncs creates a stubPrompt whose calls are governed by arbitrary
+// functions (each may inspect ctx, block, return errors, etc.).
+func newStubWithFuncs(fns ...func(ctx context.Context) (*kit.TurnResult, error)) *stubPrompt {
+	return &stubPrompt{fns: fns}
 }

 // waitForCondition polls fn() up to maxWait, returning true if fn returns true
@@ -123,16 +94,15 @@ func waitForCondition(maxWait time.Duration, fn func() bool) bool {
 // Run (single prompt)
 // --------------------------------------------------------------------------

-// TestRun_single verifies that a single call to Run() executes the agent step
+// TestRun_single verifies that a single call to Run() executes the prompt
 // and transitions the app back to idle (busy==false).
 func TestRun_single(t *testing.T) {
-	stub := newStubAgent(makeResult("hello"))
+	stub := newStub("hello")
 	app := newTestApp(stub)
 	defer app.Close()

 	app.Run("hello world")

-	// Wait for the step to complete (app becomes idle).
 	ok := waitForCondition(2*time.Second, func() bool {
 		app.mu.Lock()
 		defer app.mu.Unlock()
@@ -141,40 +111,11 @@ func TestRun_single(t *testing.T) {
 	if !ok {
 		t.Fatal("app did not become idle within 2s after single Run()")
 	}
-	if got := stub.CallCount(); got != 1 {
-		t.Fatalf("expected agent called 1 time, got %d", got)
+	if got := stub.callCount(); got != 1 {
+		t.Fatalf("expected 1 call, got %d", got)
 	}
 }

-// TestRun_addsUserMessageToStore verifies that Run() adds the user message to
-// the MessageStore before calling the agent.
-func TestRun_addsUserMessageToStore(t *testing.T) {
-	var storedMsgs []fantasy.Message
-	stub := newStubAgentWithFuncs(func(_ context.Context) (*agent.GenerateWithLoopResult, error) {
-		// This function is a stub for the GenerateWithLoopAndStreaming call;
-		// the user message is added before the call.
-		return makeResult("ok"), nil
-	})
-	app := newTestApp(stub)
-	defer app.Close()
-
-	// Capture the message store state synchronously by replacing the store.
-	// Instead, use a spy: hook into GenerateWithLoopAndStreaming via stub.
-	_ = storedMsgs // suppress unused warning
-
-	app.Run("my prompt")
-	waitForCondition(2*time.Second, func() bool {
-		app.mu.Lock()
-		defer app.mu.Unlock()
-		return !app.busy
-	})
-
-	// After the step the store should contain at least 1 message (user message).
-	// The agent may Replace the store with an empty ConversationMessages so this
-	// is a >=1 check only *before* replacement. Instead, verify via a spy stub
-	// that the messages slice passed to the agent contains the user message.
-}
-
 // --------------------------------------------------------------------------
 // Run (queued prompts)
 // --------------------------------------------------------------------------
@@ -183,46 +124,38 @@ func TestRun_addsUserMessageToStore(t *testing.T) {
 // enqueues the prompt rather than spawning a second goroutine, and that the
 // queue is drained after the first step completes.
 func TestRun_queued(t *testing.T) {
-	gate := make(chan struct{}) // blocks second call until we release it
+	gate := make(chan struct{})
 	callCount := 0
 	var mu sync.Mutex

-	stub := newStubAgentWithFuncs(
-		// First call: block until gate is released.
-		func(ctx context.Context) (*agent.GenerateWithLoopResult, error) {
+	stub := newStubWithFuncs(
+		func(ctx context.Context) (*kit.TurnResult, error) {
 			mu.Lock()
 			callCount++
 			mu.Unlock()
 			<-gate
-			return makeResult("first"), nil
+			return turnResult("first"), nil
 		},
-		// Second call: instant success.
-		func(_ context.Context) (*agent.GenerateWithLoopResult, error) {
+		func(_ context.Context) (*kit.TurnResult, error) {
 			mu.Lock()
 			callCount++
 			mu.Unlock()
-			return makeResult("second"), nil
+			return turnResult("second"), nil
 		},
 	)
 	app := newTestApp(stub)
 	defer app.Close()

 	app.Run("first prompt")
-
-	// Allow the goroutine to start and enter the gate block.
 	time.Sleep(20 * time.Millisecond)
-
 	app.Run("second prompt")

-	// Second prompt should now be in the queue.
 	if got := app.QueueLength(); got != 1 {
 		t.Fatalf("expected queue length 1, got %d", got)
 	}

-	// Release the gate so the first step can finish.
 	close(gate)

-	// Both steps should eventually complete.
 	ok := waitForCondition(3*time.Second, func() bool {
 		app.mu.Lock()
 		defer app.mu.Unlock()
@@ -236,7 +169,7 @@ func TestRun_queued(t *testing.T) {
 	total := callCount
 	mu.Unlock()
 	if total != 2 {
-		t.Fatalf("expected agent called 2 times, got %d", total)
+		t.Fatalf("expected 2 calls, got %d", total)
 	}
 	if got := app.QueueLength(); got != 0 {
 		t.Fatalf("expected empty queue after drain, got %d", got)
@@ -253,25 +186,25 @@ func TestQueueDrainOrdering(t *testing.T) {
 	var order []string
 	var mu sync.Mutex

-	stub := newStubAgentWithFuncs(
-		func(ctx context.Context) (*agent.GenerateWithLoopResult, error) {
+	stub := newStubWithFuncs(
+		func(ctx context.Context) (*kit.TurnResult, error) {
 			mu.Lock()
 			order = append(order, "first")
 			mu.Unlock()
 			<-gate
-			return makeResult("first"), nil
+			return turnResult("first"), nil
 		},
-		func(_ context.Context) (*agent.GenerateWithLoopResult, error) {
+		func(_ context.Context) (*kit.TurnResult, error) {
 			mu.Lock()
 			order = append(order, "second")
 			mu.Unlock()
-			return makeResult("second"), nil
+			return turnResult("second"), nil
 		},
-		func(_ context.Context) (*agent.GenerateWithLoopResult, error) {
+		func(_ context.Context) (*kit.TurnResult, error) {
 			mu.Lock()
 			order = append(order, "third")
 			mu.Unlock()
-			return makeResult("third"), nil
+			return turnResult("third"), nil
 		},
 	)

@@ -317,10 +250,9 @@ func TestQueueDrainOrdering(t *testing.T) {
 // eventually transitions to idle.
 func TestCancelCurrentStep_cancelsInflightStep(t *testing.T) {
 	started := make(chan struct{}, 1)
-	stub := newStubAgentWithFuncs(
-		func(ctx context.Context) (*agent.GenerateWithLoopResult, error) {
+	stub := newStubWithFuncs(
+		func(ctx context.Context) (*kit.TurnResult, error) {
 			started <- struct{}{}
-			// Block until ctx is cancelled.
 			<-ctx.Done()
 			return nil, ctx.Err()
 		},
@@ -331,7 +263,6 @@ func TestCancelCurrentStep_cancelsInflightStep(t *testing.T) {

 	app.Run("cancel me")

-	// Wait for the step to start.
 	select {
 	case <-started:
 	case <-time.After(2 * time.Second):
@@ -353,9 +284,8 @@ func TestCancelCurrentStep_cancelsInflightStep(t *testing.T) {
 // TestCancelCurrentStep_safeWhenIdle verifies that calling CancelCurrentStep()
 // when no step is in-flight is a no-op and does not panic.
 func TestCancelCurrentStep_safeWhenIdle(t *testing.T) {
-	app := newTestApp(newStubAgent())
+	app := newTestApp(newStub())
 	defer app.Close()
-	// Should not panic.
 	app.CancelCurrentStep()
 }

@@ -367,17 +297,17 @@ func TestCancelCurrentStep_safeWhenIdle(t *testing.T) {
 // enqueued prompts and resets queue length to zero.
 func TestClearQueue_removesQueuedPrompts(t *testing.T) {
 	gate := make(chan struct{})
-	stub := newStubAgentWithFuncs(
-		func(ctx context.Context) (*agent.GenerateWithLoopResult, error) {
+	stub := newStubWithFuncs(
+		func(ctx context.Context) (*kit.TurnResult, error) {
 			<-gate
-			return makeResult("first"), nil
+			return turnResult("first"), nil
 		},
 	)
 	app := newTestApp(stub)
 	defer app.Close()

 	app.Run("first")
-	time.Sleep(20 * time.Millisecond) // let first step start
+	time.Sleep(20 * time.Millisecond)

 	app.Run("second")
 	app.Run("third")
@@ -392,7 +322,6 @@ func TestClearQueue_removesQueuedPrompts(t *testing.T) {
 		t.Fatalf("expected queue length 0 after ClearQueue(), got %d", got)
 	}

-	// Release the first step; since queue is cleared, app should go idle quickly.
 	close(gate)
 	ok := waitForCondition(2*time.Second, func() bool {
 		app.mu.Lock()
@@ -411,19 +340,16 @@ func TestClearQueue_removesQueuedPrompts(t *testing.T) {
 // TestClose_preventsNewRuns verifies that after Close() is called, subsequent
 // Run() calls are silently dropped (no goroutine spawned).
 func TestClose_preventsNewRuns(t *testing.T) {
-	stub := newStubAgent()
+	stub := newStub()
 	app := newTestApp(stub)

 	app.Close()

-	// Should be a no-op (closed flag is set).
 	app.Run("should be dropped")
-
-	// Give it a moment to ensure no goroutine starts.
 	time.Sleep(50 * time.Millisecond)

-	if got := stub.CallCount(); got != 0 {
-		t.Fatalf("expected 0 agent calls after Close(), got %d", got)
+	if got := stub.callCount(); got != 0 {
+		t.Fatalf("expected 0 calls after Close(), got %d", got)
 	}
 }

@@ -433,33 +359,29 @@ func TestClose_waitsForInflightStep(t *testing.T) {
 	gate := make(chan struct{})
 	stepFinished := make(chan struct{}, 1)

-	stub := newStubAgentWithFuncs(
-		func(ctx context.Context) (*agent.GenerateWithLoopResult, error) {
+	stub := newStubWithFuncs(
+		func(_ context.Context) (*kit.TurnResult, error) {
 			<-gate
 			stepFinished <- struct{}{}
-			return makeResult("done"), nil
+			return turnResult("done"), nil
 		},
 	)
 	app := newTestApp(stub)

 	app.Run("in-flight")
-	time.Sleep(20 * time.Millisecond) // let goroutine start
+	time.Sleep(20 * time.Millisecond)

 	closeDone := make(chan struct{})
 	go func() {
-		// This should block until the in-flight step finishes.
-		close(gate) // release the step
+		close(gate)
 		app.Close()
 		close(closeDone)
 	}()

-	// Close() must only return after the step finishes.
 	select {
 	case <-closeDone:
-		// Check that step actually finished before Close() returned.
 		select {
 		case <-stepFinished:
-			// Good: step finished before Close() returned.
 		default:
 			t.Error("Close() returned before step finished")
 		}
@@ -471,17 +393,17 @@ func TestClose_waitsForInflightStep(t *testing.T) {
 // TestClose_idempotent verifies that calling Close() multiple times does not
 // panic or deadlock.
 func TestClose_idempotent(t *testing.T) {
-	app := newTestApp(newStubAgent())
+	app := newTestApp(newStub())
+	app.Close()
 	app.Close()
-	app.Close() // second call must be a no-op
 }

 // TestClose_cancelsInflightStep verifies that Close() cancels the root context,
 // causing a blocking step to unblock via ctx.Done().
 func TestClose_cancelsInflightStep(t *testing.T) {
 	started := make(chan struct{}, 1)
-	stub := newStubAgentWithFuncs(
-		func(ctx context.Context) (*agent.GenerateWithLoopResult, error) {
+	stub := newStubWithFuncs(
+		func(ctx context.Context) (*kit.TurnResult, error) {
 			started <- struct{}{}
 			<-ctx.Done()
 			return nil, ctx.Err()
@@ -504,7 +426,6 @@ func TestClose_cancelsInflightStep(t *testing.T) {

 	select {
 	case <-closeDone:
-		// Good: Close() returned.
 	case <-time.After(3 * time.Second):
 		t.Fatal("Close() timed out after cancelling in-flight step")
 	}
@@ -514,11 +435,11 @@ func TestClose_cancelsInflightStep(t *testing.T) {
 // StepError handling
 // --------------------------------------------------------------------------

-// TestRun_stepError verifies that when the agent returns an error, the app
+// TestRun_stepError verifies that when the prompt returns an error, the app
 // transitions back to idle (not stuck in busy state).
 func TestRun_stepError(t *testing.T) {
-	stub := newStubAgentWithFuncs(
-		func(_ context.Context) (*agent.GenerateWithLoopResult, error) {
+	stub := newStubWithFuncs(
+		func(_ context.Context) (*kit.TurnResult, error) {
 			return nil, errors.New("agent exploded")
 		},
 	)
@@ -543,7 +464,7 @@ func TestRun_stepError(t *testing.T) {

 // TestClearMessages_emptiesStore verifies that ClearMessages() empties the store.
 func TestClearMessages_emptiesStore(t *testing.T) {
-	app := newTestApp(newStubAgent())
+	app := newTestApp(newStub())
 	defer app.Close()

 	app.store.Add(makeTextMsg("user", "hello"))
@@ -565,14 +486,13 @@ func TestClearMessages_emptiesStore(t *testing.T) {
 // TestQueueLength_reflects verifies that QueueLength() accurately reflects
 // the queue depth.
 func TestQueueLength_reflects(t *testing.T) {
-	app := newTestApp(newStubAgent())
+	app := newTestApp(newStub())
 	defer app.Close()

 	if got := app.QueueLength(); got != 0 {
 		t.Fatalf("expected 0, got %d", got)
 	}

-	// Manually push items into the queue (without triggering goroutines).
 	app.mu.Lock()
 	app.queue = append(app.queue, "a", "b", "c")
 	app.mu.Unlock()
@@ -54,12 +54,9 @@ type ResponseCompleteEvent struct {
 }

 // StepCompleteEvent is sent when an agent step finishes successfully.
-// It includes the final response and aggregated usage statistics for the step.
 type StepCompleteEvent struct {
-	// Response is the final fantasy response from the completed step.
-	Response *fantasy.Response
-	// Usage contains aggregated token usage data for the step.
-	Usage fantasy.Usage
+	// ResponseText is the final assistant response text.
+	ResponseText string
 }

 // StepErrorEvent is sent when an agent step fails with an error.
@@ -3,31 +3,11 @@ package app
 import (
 	"context"

-	"charm.land/fantasy"
-
-	"github.com/mark3labs/kit/internal/agent"
 	"github.com/mark3labs/kit/internal/config"
-	"github.com/mark3labs/kit/internal/extensions"
 	"github.com/mark3labs/kit/internal/session"
 	kit "github.com/mark3labs/kit/pkg/kit"
 )

-// AgentRunner is the minimal interface the app layer requires from the agent
-// package. *agent.Agent satisfies this interface. Defining it here allows
-// unit tests to supply stub implementations without spinning up a real LLM.
-type AgentRunner interface {
-	GenerateWithLoopAndStreaming(
-		ctx context.Context,
-		messages []fantasy.Message,
-		onToolCall agent.ToolCallHandler,
-		onToolExecution agent.ToolExecutionHandler,
-		onToolResult agent.ToolResultHandler,
-		onResponse agent.ResponseHandler,
-		onToolCallContent agent.ToolCallContentHandler,
-		onStreamingResponse agent.StreamingResponseHandler,
-	) (*agent.GenerateWithLoopResult, error)
-}
-
 // UsageUpdater is the interface the app layer uses to record token usage after
 // each agent step. It is satisfied by *ui.UsageTracker (which lives in
 // internal/ui) without creating an import cycle — the concrete type is wired
@@ -46,18 +26,17 @@ type UsageUpdater interface {
 	SetContextTokens(tokens int)
 }

-// Options configures an App instance. It mirrors the fields from AgenticLoopConfig
-// in cmd/root.go but is owned by the app layer rather than the CLI.
+// Options configures an App instance.
 type Options struct {
-	// Kit is the SDK instance. When set, executeStep() delegates to
-	// kit.PromptResult() and events flow through SDK subscriptions.
-	// When nil, the legacy AgentRunner path is used (for tests).
+	// Kit is the SDK instance. executeStep() delegates to kit.PromptResult()
+	// and events flow through SDK subscriptions. Required in production;
+	// tests may use PromptFunc instead.
 	Kit *kit.Kit

-	// Agent is the agent used to run the agentic loop.
-	// When Kit is set, this field is ignored (Kit owns the agent).
-	// Required when Kit is nil (e.g. in tests with stub agents).
-	Agent AgentRunner
+	// PromptFunc overrides Kit.PromptResult for testing. When set,
+	// executeStep calls this directly, bypassing SDK event subscription
+	// and usage tracking. Must not be set in production.
+	PromptFunc func(ctx context.Context, prompt string) (*kit.TurnResult, error)

 	// TreeSession is the tree-structured JSONL session manager. When non-nil,
 	// conversation history is persisted as an append-only JSONL tree and tree
@@ -97,10 +76,4 @@ type Options struct {
 	// EstimateAndUpdateUsage as a fallback) using the usage data returned by the
 	// agent. Satisfied by *ui.UsageTracker; wired in cmd/root.go.
 	UsageTracker UsageUpdater
-
-	// Extensions is the optional extension runner. When non-nil, lifecycle
-	// events (Input, BeforeAgentStart, AgentEnd, etc.) are emitted through
-	// it. Tool-level events (ToolCall, ToolResult) are handled by wrapper.go
-	// at the tool layer, not here.
-	Extensions *extensions.Runner
 }
@@ -149,12 +149,8 @@ func (h *CLIEventHandler) Handle(msg tea.Msg) {
 		h.endStream()

 		// Non-streaming fallback: render the full response if not already shown.
-		responseText := ""
-		if e.Response != nil {
-			responseText = e.Response.Content.Text()
-		}
-		if responseText != "" && responseText != h.lastDisplayed {
-			_ = h.cli.DisplayAssistantMessageWithModel(responseText, h.modelName)
+		if e.ResponseText != "" && e.ResponseText != h.lastDisplayed {
+			_ = h.cli.DisplayAssistantMessageWithModel(e.ResponseText, h.modelName)
 		}

 		// Display usage. The app layer has already updated the shared
@@ -5,7 +5,6 @@ import (
 	"testing"

 	tea "charm.land/bubbletea/v2"
-	"charm.land/fantasy"
 	"github.com/mark3labs/kit/internal/app"
 	"github.com/mark3labs/kit/internal/session"
 )
@@ -116,15 +115,6 @@ func sendMsg(m *AppModel, msg tea.Msg) *AppModel {
 	return updated.(*AppModel)
 }

-// makeTestResponse constructs a fantasy.Response with the given text content.
-// Uses fantasy.TextContent (the type that ResponseContent.Text() recognises) rather
-// than TextPart (which is a request-side type).
-func makeTestResponse(text string) *fantasy.Response {
-	return &fantasy.Response{
-		Content: fantasy.ResponseContent{fantasy.TextContent{Text: text}},
-	}
-}
-
 // --------------------------------------------------------------------------
 // State transitions
 // --------------------------------------------------------------------------
@@ -156,10 +146,7 @@ func TestStateTransition_WorkingToInput_StepComplete(t *testing.T) {
 	m, stream, _ := newTestAppModel(ctrl)
 	m.state = stateWorking

-	m = sendMsg(m, app.StepCompleteEvent{
-		Response: makeTestResponse("all done"),
-		Usage:    fantasy.Usage{},
-	})
+	m = sendMsg(m, app.StepCompleteEvent{ResponseText: "all done"})

 	if m.state != stateInput {
 		t.Fatalf("expected stateInput after StepCompleteEvent, got %v", m.state)
@@ -362,9 +349,7 @@ func TestESCCancel_clearedOnStepComplete(t *testing.T) {
 	m.state = stateWorking
 	m.canceling = true

-	m = sendMsg(m, app.StepCompleteEvent{
-		Response: makeTestResponse("done"),
-	})
+	m = sendMsg(m, app.StepCompleteEvent{ResponseText: "done"})

 	if m.canceling {
 		t.Fatal("expected canceling=false after StepCompleteEvent")
@@ -496,10 +481,7 @@ func TestStepComplete_flushesStreamContent(t *testing.T) {
 	// Simulate accumulated streaming text.
 	stream.renderedContent = "rendered assistant text"

-	_, cmd := m.Update(app.StepCompleteEvent{
-		Response: makeTestResponse("final answer"),
-		Usage:    fantasy.Usage{},
-	})
+	_, cmd := m.Update(app.StepCompleteEvent{ResponseText: "final answer"})

 	// A non-nil cmd means flushStreamContent returned tea.Println(...)
 	if cmd == nil {
@@ -514,7 +496,7 @@ func TestStepComplete_noStreamContent_noCmd(t *testing.T) {
 	m, _, _ := newTestAppModel(ctrl)
 	m.state = stateWorking

-	_, cmd := m.Update(app.StepCompleteEvent{Response: nil})
+	_, cmd := m.Update(app.StepCompleteEvent{})

 	if cmd != nil {
 		t.Fatal("expected nil cmd on StepCompleteEvent with no stream content")