kit/pkg/kit/kit.go

package kit

import (
	"context"
	"encoding/json"
	"fmt"
	"io"
	"log"
	"os"
	"path/filepath"
	"strings"
	"sync"
	"time"

	"charm.land/fantasy"

	"github.com/mark3labs/kit/internal/agent"
	"github.com/mark3labs/kit/internal/config"
	"github.com/mark3labs/kit/internal/core"
	"github.com/mark3labs/kit/internal/extensions"
	"github.com/mark3labs/kit/internal/kitsetup"
	"github.com/mark3labs/kit/internal/message"
	"github.com/mark3labs/kit/internal/models"
	"github.com/mark3labs/kit/internal/session"
	"github.com/mark3labs/kit/internal/skills"
	"github.com/mark3labs/kit/internal/tools"

	"github.com/spf13/viper"
)

// ContextFile represents a project context file (e.g. AGENTS.md) that was
// loaded during initialization and injected into the system prompt.
type ContextFile struct {
	Path    string // Absolute filesystem path.
	Content string // Full file content.
}

// Kit provides programmatic access to kit functionality, allowing
// integration of MCP tools and LLM interactions into Go applications. It manages
// agents, sessions, and model configurations.
type Kit struct {
	agent          *agent.Agent
	session        SessionManager
	modelString    string
	events         *eventBus
	autoCompact    bool
	compactionOpts *CompactionOptions
	contextFiles   []*ContextFile
	skills         []*skills.Skill
	extRunner      *extensions.Runner
	bufferedLogger *tools.BufferedDebugLogger
	authHandler    MCPAuthHandler // OAuth handler for remote MCP servers (may need Close)
	opts           *Options       // stored for reload operations (skills, etc.)
	mcpConfig      *config.Config // loaded MCP/server config, shared with subagents

	// hasCustomSystemPrompt is true when the user explicitly configured a
	// system prompt (via --system-prompt flag, config file, or SDK option).
	// When false, per-model system prompts from modelSettings/customModels
	// can replace the default prompt on model switch.
	hasCustomSystemPrompt bool
	// systemPromptSource holds the raw configured value (file path or text)
	// when hasCustomSystemPrompt is true; empty when the built-in default is in use.
	systemPromptSource string

	// Hook registries — interception layer (see hooks.go).
	beforeToolCall  *hookRegistry[BeforeToolCallHook, BeforeToolCallResult]
	afterToolResult *hookRegistry[AfterToolResultHook, AfterToolResultResult]
	beforeTurn      *hookRegistry[BeforeTurnHook, BeforeTurnResult]
	afterTurn       *hookRegistry[AfterTurnHook, AfterTurnResult]
	contextPrepare  *hookRegistry[ContextPrepareHook, ContextPrepareResult]
	beforeCompact   *hookRegistry[BeforeCompactHook, BeforeCompactResult]
	prepareStep     *hookRegistry[PrepareStepHook, PrepareStepResult]

	// lastInputTokens stores the API-reported input token count from the
	// most recent turn. Used by GetContextStats() to return accurate usage
	// instead of the text-based heuristic which misses system prompts,
	// tool definitions, etc.
	lastInputTokensMu sync.RWMutex
	lastInputTokens   int

	// subagentListeners holds per-tool-call event listeners registered via
	// SubscribeSubagent(). Keyed by toolCallID → *subagentListenerSet.
	subagentListeners sync.Map

	// skillCache holds skills discovered for this Kit instance.
	// Using a per-instance cache avoids cross-contamination when multiple
	// Kit instances exist in the same process.
	skillCache struct {
		skills []*skills.Skill
		mu     sync.RWMutex
	}

	// steerCh is a buffered channel used to inject steering messages into
	// the running agent turn via the LLM library's PrepareStep. Created fresh for
	// each generate() call and set to nil when idle. Protected by steerMu.
	steerMu       sync.Mutex
	steerCh       chan agent.SteerMessage
	leftoverSteer []agent.SteerMessage // unconsumed steer messages from the last turn
}

// Subscribe registers an EventListener that will be called for every lifecycle
// event emitted during Prompt(). Returns an unsubscribe function that removes
// the listener.
func (m *Kit) Subscribe(listener EventListener) func() {
	return m.events.subscribe(listener)
}

// --------------------------------------------------------------------------
// Narrow accessors
// --------------------------------------------------------------------------

// GetToolNames returns the names of all tools available to the agent.
func (m *Kit) GetToolNames() []string {
	agentTools := m.agent.GetTools()
	names := make([]string, len(agentTools))
	for i, t := range agentTools {
		names[i] = t.Info().Name
	}
	return names
}

// GetLoadingMessage returns the agent's startup info message (e.g. GPU
// fallback info), or empty string if none.
func (m *Kit) GetLoadingMessage() string {
	return m.agent.GetLoadingMessage()
}

// GetLoadedServerNames returns the names of successfully loaded MCP servers.
// If MCP servers are still loading in the background, this returns only the
// servers that have completed loading so far.
func (m *Kit) GetLoadedServerNames() []string {
	return m.agent.GetLoadedServerNames()
}

// GetMCPToolCount returns the number of tools loaded from external MCP servers.
// If MCP servers are still loading in the background, this returns the count
// of tools loaded so far (may be 0).
func (m *Kit) GetMCPToolCount() int {
	return m.agent.GetMCPToolCount()
}

// WaitForMCPTools blocks until background MCP tool loading completes.
// Returns nil if no MCP servers are configured or if loading succeeded.
// Returns the loading error if all servers failed. Safe to call multiple times.
func (m *Kit) WaitForMCPTools() error {
	return m.agent.WaitForMCPTools()
}

// MCPToolsReady returns true if MCP tool loading has completed (or was never
// started). This is a non-blocking check useful for UI status display.
func (m *Kit) MCPToolsReady() bool {
	return m.agent.MCPToolsReady()
}

// MCPServerStatus describes the runtime state of a loaded MCP server.
type MCPServerStatus struct {
	// Name is the configured server name.
	Name string
	// ToolCount is the number of tools loaded from this server.
	ToolCount int
}

// AddMCPServer connects to a new MCP server at runtime and makes its tools
// available to the agent immediately. The server's tools are prefixed with the
// server name (e.g. "myserver__tool_name") to avoid naming conflicts, matching
// the behaviour of servers loaded at initialization.
//
// Returns the number of tools loaded from the server.
//
// AddMCPServer is safe to call while the agent is idle. If a turn is in
// progress ([Kit.IsGenerating] returns true), the new tools will be visible
// starting from the next LLM step.
//
// Example:
//
//	n, err := k.AddMCPServer(ctx, "github", kit.MCPServerConfig{
//	    Command: []string{"npx", "-y", "@modelcontextprotocol/server-github"},
//	    Environment: map[string]string{"GITHUB_TOKEN": os.Getenv("GITHUB_TOKEN")},
//	})
func (m *Kit) AddMCPServer(ctx context.Context, name string, cfg MCPServerConfig) (int, error) {
	return m.agent.AddMCPServer(ctx, name, cfg)
}

// AddInProcessMCPServer connects an in-process mcp-go server and makes its
// tools available to the agent immediately. Unlike [AddMCPServer] with a
// command/URL config, this uses mcp-go's in-process transport — no subprocess
// is spawned and no network I/O occurs.
//
// The server must be a *[server.MCPServer] from github.com/mark3labs/mcp-go/server.
// Kit does not take ownership of the server's lifecycle; the caller is responsible
// for any cleanup when the server is no longer needed.
//
// Returns the number of tools loaded from the server.
//
// Example:
//
//	import (
//	    "github.com/mark3labs/mcp-go/mcp"
//	    "github.com/mark3labs/mcp-go/server"
//	)
//
//	mcpSrv := server.NewMCPServer("my-tools", "1.0.0",
//	    server.WithToolCapabilities(true),
//	)
//	mcpSrv.AddTool(mcp.NewTool("search_docs",
//	    mcp.WithDescription("Search documentation"),
//	    mcp.WithString("query", mcp.Required()),
//	), searchHandler)
//
//	n, err := k.AddInProcessMCPServer(ctx, "docs", mcpSrv)
func (m *Kit) AddInProcessMCPServer(ctx context.Context, name string, srv *MCPServer) (int, error) {
	cfg := MCPServerConfig{
		Type:            "inprocess",
		InProcessServer: srv,
	}
	return m.agent.AddMCPServer(ctx, name, cfg)
}

// RemoveMCPServer disconnects an MCP server and removes all its tools from
// the agent. After this call the agent will no longer see or be able to call
// tools from the named server.
//
// RemoveMCPServer is safe to call while the agent is idle. If a turn is in
// progress, the tools are removed at the next LLM step. Any in-flight tool
// calls to the removed server will fail gracefully.
//
// Returns an error if the named server is not currently loaded.
func (m *Kit) RemoveMCPServer(name string) error {
	return m.agent.RemoveMCPServer(name)
}

// ListMCPServers returns the status of all currently loaded MCP servers.
// The returned slice is a snapshot; it is safe to read concurrently.
func (m *Kit) ListMCPServers() []MCPServerStatus {
	names := m.agent.GetLoadedServerNames()
	if len(names) == 0 {
		return nil
	}

	// Build a tool count per server by scanning tool names for the prefix.
	toolNames := m.GetToolNames()
	countByServer := make(map[string]int, len(names))
	for _, tn := range toolNames {
		for _, sn := range names {
			prefix := sn + "__"
			if len(tn) > len(prefix) && tn[:len(prefix)] == prefix {
				countByServer[sn]++
				break
			}
		}
	}

	result := make([]MCPServerStatus, 0, len(names))
	for _, n := range names {
		result = append(result, MCPServerStatus{
			Name:      n,
			ToolCount: countByServer[n],
		})
	}
	return result
}

// GetExtensionToolCount returns the number of tools registered by extensions.
func (m *Kit) GetExtensionToolCount() int {
	return m.agent.GetExtensionToolCount()
}

// --------------------------------------------------------------------------
// MCP Prompts
// --------------------------------------------------------------------------

// MCPPrompt describes a prompt exposed by an MCP server.
type MCPPrompt struct {
	// Name is the prompt name on the MCP server.
	Name string
	// Description is a human-readable description.
	Description string
	// Arguments lists the prompt's expected arguments.
	Arguments []MCPPromptArgument
	// ServerName is the MCP server that provides this prompt.
	ServerName string
}

// MCPPromptArgument describes a single argument for an MCP prompt.
type MCPPromptArgument struct {
	// Name is the argument name.
	Name string
	// Description is a human-readable description.
	Description string
	// Required indicates whether this argument must be provided.
	Required bool
}

// MCPPromptMessage is a single message returned by a prompt expansion.
type MCPPromptMessage struct {
	// Role is "user" or "assistant".
	Role string
	// Content is the text content of the message.
	Content string
	// FileParts contains binary attachments extracted from embedded resources,
	// images, or audio content blocks within the prompt message. Empty for
	// text-only messages.
	FileParts []LLMFilePart
}

// MCPPromptResult is the result of expanding an MCP prompt.
type MCPPromptResult struct {
	// Description is an optional description returned by the server.
	Description string
	// Messages contains the expanded prompt messages.
	Messages []MCPPromptMessage
}

// ListMCPPrompts returns all prompts discovered from connected MCP servers.
// If MCP servers are still loading in the background, this returns only the
// prompts discovered so far. Returns nil if no prompts are available.
func (m *Kit) ListMCPPrompts() []MCPPrompt {
	internal := m.agent.GetMCPPrompts()
	if len(internal) == 0 {
		return nil
	}
	result := make([]MCPPrompt, len(internal))
	for i, p := range internal {
		args := make([]MCPPromptArgument, len(p.Arguments))
		for j, a := range p.Arguments {
			args[j] = MCPPromptArgument{
				Name:        a.Name,
				Description: a.Description,
				Required:    a.Required,
			}
		}
		result[i] = MCPPrompt{
			Name:        p.Name,
			Description: p.Description,
			Arguments:   args,
			ServerName:  p.ServerName,
		}
	}
	return result
}

// GetMCPPrompt retrieves and expands a specific prompt from an MCP server.
// This is a lazy call — the server is contacted each time to get the latest
// prompt content. Arguments are passed as key=value pairs to the server for
// template substitution.
//
// Returns an error if the server is not found or the prompt expansion fails.
func (m *Kit) GetMCPPrompt(ctx context.Context, serverName, promptName string, args map[string]string) (*MCPPromptResult, error) {
	internal, err := m.agent.GetMCPPrompt(ctx, serverName, promptName, args)
	if err != nil {
		return nil, err
	}
	msgs := make([]MCPPromptMessage, len(internal.Messages))
	for i, msg := range internal.Messages {
		var fileParts []LLMFilePart
		for _, fp := range msg.FileParts {
			fileParts = append(fileParts, LLMFilePart{
				Filename:  fp.Filename,
				Data:      fp.Data,
				MediaType: fp.MediaType,
			})
		}
		msgs[i] = MCPPromptMessage{
			Role:      msg.Role,
			Content:   msg.Content,
			FileParts: fileParts,
		}
	}
	return &MCPPromptResult{
		Description: internal.Description,
		Messages:    msgs,
	}, nil
}

// --------------------------------------------------------------------------
// MCP Resources
// --------------------------------------------------------------------------

// MCPResource describes a resource exposed by an MCP server.
type MCPResource struct {
	// URI is the unique resource identifier (e.g. "file:///path" or custom scheme).
	URI string
	// Name is a human-readable name for the resource.
	Name string
	// Description is an optional description of the resource.
	Description string
	// MIMEType is the MIME type of the resource, if known.
	MIMEType string
	// ServerName is the MCP server that provides this resource.
	ServerName string
}

// MCPResourceContent is the result of reading an MCP resource.
type MCPResourceContent struct {
	// URI is the resource URI that was read.
	URI string
	// MIMEType is the MIME type of the content.
	MIMEType string
	// Text is the text content (non-empty for text resources).
	Text string
	// BlobData is the decoded binary content (non-empty for blob resources).
	BlobData []byte
	// IsBlob is true when the content is binary (BlobData is set).
	IsBlob bool
}

// ListMCPResources returns all resources discovered from connected MCP servers.
// If MCP servers are still loading in the background, this returns only the
// resources discovered so far. Returns nil if no resources are available.
func (m *Kit) ListMCPResources() []MCPResource {
	internal := m.agent.GetMCPResources()
	if len(internal) == 0 {
		return nil
	}
	result := make([]MCPResource, len(internal))
	for i, r := range internal {
		result[i] = MCPResource{
			URI:         r.URI,
			Name:        r.Name,
			Description: r.Description,
			MIMEType:    r.MIMEType,
			ServerName:  r.ServerName,
		}
	}
	return result
}

// ReadMCPResource reads a specific resource from an MCP server by URI.
// Returns the resource content (text or binary blob).
func (m *Kit) ReadMCPResource(ctx context.Context, serverName, uri string) (*MCPResourceContent, error) {
	internal, err := m.agent.ReadMCPResource(ctx, serverName, uri)
	if err != nil {
		return nil, err
	}
	return &MCPResourceContent{
		URI:      internal.URI,
		MIMEType: internal.MIMEType,
		Text:     internal.Text,
		BlobData: internal.BlobData,
		IsBlob:   internal.IsBlob,
	}, nil
}

// SubscribeMCPResource subscribes to change notifications for a resource.
// When the resource changes on the server, the resource list is refreshed.
func (m *Kit) SubscribeMCPResource(ctx context.Context, serverName, uri string) error {
	return m.agent.SubscribeMCPResource(ctx, serverName, uri)
}

// UnsubscribeMCPResource cancels change notifications for a resource.
func (m *Kit) UnsubscribeMCPResource(ctx context.Context, serverName, uri string) error {
	return m.agent.UnsubscribeMCPResource(ctx, serverName, uri)
}

// GetBufferedDebugMessages returns any debug messages that were buffered
// during initialization, then clears the buffer. Returns nil if no messages
// were buffered or if buffered logging was not configured.
func (m *Kit) GetBufferedDebugMessages() []string {
	if m.bufferedLogger == nil {
		return nil
	}
	return m.bufferedLogger.GetMessages()
}

// StructuredMessage represents a conversation message with typed content parts
// (tool calls, reasoning, finish markers, etc.) instead of flattened text.
type StructuredMessage struct {
	ID        string
	ParentID  string
	Role      MessageRole
	Parts     []ContentPart
	Model     string
	Provider  string
	Timestamp string // RFC3339 format
}

// GetStructuredMessages returns the conversation messages on the current
// branch with full typed content parts. Unlike GetSessionMessages() which
// flattens all content to a single text string, this preserves tool calls,
// tool results, reasoning blocks, and finish markers as distinct typed parts.
func (m *Kit) GetStructuredMessages() []StructuredMessage {
	if m.session == nil {
		return nil
	}

	branch := m.session.GetCurrentBranch()
	var results []StructuredMessage
	for _, entry := range branch {
		if entry.Type != EntryTypeMessage {
			continue
		}
		results = append(results, StructuredMessage{
			ID:        entry.ID,
			ParentID:  entry.ParentID,
			Role:      MessageRole(entry.Role),
			Parts:     entry.RawParts,
			Model:     entry.Model,
			Provider:  entry.Provider,
			Timestamp: entry.Timestamp.Format("2006-01-02T15:04:05Z07:00"),
		})
	}
	return results
}

// iterBranchMessages iterates over the current branch's MessageEntry items,
// converting each to a message.Message and calling fn to build the result.
// Returns nil if there is no session. Skips entries that are not
// MessageEntry or that fail conversion.
// Deprecated: Use SessionManager.GetCurrentBranch() directly.
func iterBranchMessages[T any](tm *session.TreeManager, fn func(*session.MessageEntry, message.Message) T) []T {
	if tm == nil {
		return nil
	}

	branch := tm.GetBranch("")
	var results []T
	for _, entry := range branch {
		me, ok := entry.(*session.MessageEntry)
		if !ok {
			continue
		}
		msg, err := me.ToMessage()
		if err != nil {
			continue
		}
		results = append(results, fn(me, msg))
	}
	return results
}

// SetModel changes the active model at runtime. The existing tools and
// session are preserved. When the new model has a per-model system prompt
// (from modelSettings or customModels params), it is composed with the
// current AGENTS.md context and skills before being applied.
// The model string should be in "provider/model" format
// (e.g. "anthropic/claude-sonnet-4-5-20250929").
// Returns an error if the model string is invalid or the provider cannot
// be created.
func (m *Kit) SetModel(ctx context.Context, modelString string) error {
	// Validate the model string first.
	if _, _, err := ParseModelString(modelString); err != nil {
		return err
	}

	// Build a provider config from current settings, overriding the model.
	// Load system prompt properly (handles both file paths and inline content).
	systemPrompt, _ := config.LoadSystemPrompt(viper.GetString("system-prompt"))
	thinkingLevel := models.ParseThinkingLevel(viper.GetString("thinking-level"))

	// Validate and adjust thinking level for the target model.
	// Some models (e.g., OpenAI gpt-5.4) don't support "minimal" and require "none".
	if thinkingLevel != models.ThinkingOff {
		parts := strings.SplitN(modelString, "/", 2)
		if len(parts) == 2 {
			modelName := parts[1]
			if !models.IsValidThinkingLevelForModel(thinkingLevel, modelName) {
				fallback := models.SuggestThinkingLevelFallback(thinkingLevel, modelName)
				if fallback != models.ThinkingOff {
					// Adjust the thinking level in viper so the change persists.
					viper.Set("thinking-level", string(fallback))
					thinkingLevel = fallback
				}
			}
		}
	}

	// With message-level caching, thinking and caching can work together.
	// No need to disable caching when thinking is enabled.
	cfg := &models.ProviderConfig{
		ModelString:    modelString,
		SystemPrompt:   systemPrompt,
		ProviderAPIKey: viper.GetString("provider-api-key"),
		ProviderURL:    viper.GetString("provider-url"),
		MaxTokens:      viper.GetInt("max-tokens"),
		TLSSkipVerify:  viper.GetBool("tls-skip-verify"),
		ThinkingLevel:  thinkingLevel,
		DisableCaching: false, // Caching enabled by default, works with thinking
	}

	// Only set generation parameter pointers when the user has explicitly
	// provided a value. This leaves nil pointers for unset params, allowing
	// per-model defaults (modelSettings / customModels params) to apply.
	if viper.IsSet("temperature") {
		v := float32(viper.GetFloat64("temperature"))
		cfg.Temperature = &v
	}
	if viper.IsSet("top-p") {
		v := float32(viper.GetFloat64("top-p"))
		cfg.TopP = &v
	}
	if viper.IsSet("top-k") {
		v := int32(viper.GetInt("top-k"))
		cfg.TopK = &v
	}
	if viper.IsSet("frequency-penalty") {
		v := float32(viper.GetFloat64("frequency-penalty"))
		cfg.FrequencyPenalty = &v
	}
	if viper.IsSet("presence-penalty") {
		v := float32(viper.GetFloat64("presence-penalty"))
		cfg.PresencePenalty = &v
	}

	// When the user hasn't set a custom global system prompt, check for a
	// per-model system prompt. Pre-apply model settings to discover it,
	// then compose with AGENTS.md context and skills if found.
	if !m.hasCustomSystemPrompt {
		// Temporarily clear the system prompt so ApplyModelSettings can
		// detect that no explicit prompt is set and apply the per-model one.
		cfg.SystemPrompt = ""
		models.ApplyModelSettings(cfg, models.LookupModelForSettings(modelString))

		if cfg.SystemPrompt != "" {
			// Per-model system prompt found — compose with runtime context.
			cfg.SystemPrompt = m.composeSystemPrompt(cfg.SystemPrompt)
		} else {
			// No per-model prompt — restore the global composed prompt.
			cfg.SystemPrompt = systemPrompt
		}
	}

	if err := m.agent.SetModel(ctx, cfg); err != nil {
		return err
	}

	m.modelString = modelString

	// Update extension context's Model field.
	if m.extRunner != nil {
		extCtx := m.extRunner.GetContext()
		extCtx.Model = modelString
		m.extRunner.SetContext(extCtx)
	}

	return nil
}

// HasCustomSystemPrompt reports whether the user explicitly configured a system
// prompt via --system-prompt, a config file entry, or SDK Options.SystemPrompt.
// When false, the built-in default (or a per-model override) is in use and can
// be replaced transparently on model switch.
func (m *Kit) HasCustomSystemPrompt() bool {
	return m.hasCustomSystemPrompt
}

// GetSystemPromptSource returns the raw configured value — a file path or
// inline text — when HasCustomSystemPrompt is true; returns an empty string
// when the built-in default prompt is active.
func (m *Kit) GetSystemPromptSource() string {
	return m.systemPromptSource
}

// composeSystemPrompt takes a base system prompt and composes it with the
// current runtime context: AGENTS.md content, skills metadata, and date/cwd.
// This mirrors the composition done during Kit.New() initialization.
func (m *Kit) composeSystemPrompt(basePrompt string) string {
	cwd, _ := os.Getwd()
	pb := skills.NewPromptBuilder(basePrompt)

	// Inject AGENTS.md content as project context.
	for _, cf := range m.contextFiles {
		pb.WithSection("", fmt.Sprintf("Instructions from: %s\n\n%s", cf.Path, cf.Content))
	}

	// Inject skills metadata.
	if len(m.skills) > 0 {
		pb.WithSkills(m.skills)
	}

	// Append current date/time and working directory.
	pb.WithSection("", fmt.Sprintf(
		"Current date and time: %s\nCurrent working directory: %s",
		time.Now().Format("Monday, January 2, 2006, 3:04:05 PM MST"), cwd,
	))

	return pb.Build()
}

// GetAvailableModels returns a list of known models from the registry. Each
// entry includes provider, model ID, context limit, and whether the model
// supports reasoning. This is an advisory list — models not in the registry
// can still be used by specifying their provider/model string.
func (m *Kit) GetAvailableModels() []extensions.ModelInfoEntry {
	registry := models.GetGlobalRegistry()
	var result []extensions.ModelInfoEntry
	for _, providerID := range registry.GetLLMProviders() {
		modelsMap, err := registry.GetModelsForProvider(providerID)
		if err != nil {
			continue
		}
		for modelID, info := range modelsMap {
			result = append(result, extensions.ModelInfoEntry{
				Provider:     providerID,
				ModelID:      modelID,
				Name:         info.Name,
				ContextLimit: info.Limit.Context,
				OutputLimit:  info.Limit.Output,
				Reasoning:    info.Reasoning,
			})
		}
	}
	return result
}

// ReloadExtensions hot-reloads all extensions from disk. Event handlers,
// commands, renderers, shortcuts, and extension-defined tools all update
// immediately.
func (m *Kit) ReloadExtensions() error {
	if m.extRunner == nil {
		return fmt.Errorf("no extensions loaded")
	}

	// Emit shutdown to old extensions.
	if m.extRunner.HasHandlers(extensions.SessionShutdown) {
		_, _ = m.extRunner.Emit(extensions.SessionShutdownEvent{})
	}

	// Re-load from disk.
	extraPaths := viper.GetStringSlice("extension")
	loaded, err := extensions.LoadExtensions(extraPaths)
	if err != nil {
		return fmt.Errorf("reloading extensions: %w", err)
	}

	// Swap extensions on the runner (clears dynamic state).
	m.extRunner.Reload(loaded)

	// Update extension tools on the agent so the LLM sees changes.
	if m.agent != nil {
		extTools := extensions.ExtensionToolsAsLLMTools(m.extRunner.RegisteredTools(), m.extRunner)
		m.agent.SetExtraTools(extTools)
	}

	// Re-set context and emit SessionStart.
	ctx := m.extRunner.GetContext()
	m.extRunner.SetContext(ctx)
	if m.extRunner.HasHandlers(extensions.SessionStart) {
		_, _ = m.extRunner.Emit(extensions.SessionStartEvent{SessionID: ctx.SessionID})
	}

	return nil
}

// ExecuteCompletion makes a standalone LLM completion call for extensions.
// When req.Model is empty the current agent model is reused (no provider
// creation overhead). When req.Model is set a temporary provider is created,
// used, and closed.
func (m *Kit) ExecuteCompletion(ctx context.Context, req extensions.CompleteRequest) (extensions.CompleteResponse, error) {
	var (
		llmModel    fantasy.LanguageModel
		closer      func()
		usedModel   string
		providerOps LLMProviderOptions
	)

	if req.Model == "" {
		// Reuse the active agent's model.
		llmModel = m.agent.GetModel()
		usedModel = m.modelString
		closer = func() {} // nothing to clean up
	} else {
		// Create a temporary provider for the requested model.
		config := &models.ProviderConfig{
			ModelString:   req.Model,
			TLSSkipVerify: viper.GetBool("tls-skip-verify"),
		}
		if req.MaxTokens > 0 {
			config.MaxTokens = req.MaxTokens
		}
		providerResult, err := models.CreateProvider(ctx, config)
		if err != nil {
			return extensions.CompleteResponse{}, fmt.Errorf("create provider for %q: %w", req.Model, err)
		}
		llmModel = providerResult.Model
		usedModel = req.Model
		providerOps = providerResult.ProviderOptions
		closer = func() {
			if providerResult.Closer != nil {
				_ = providerResult.Closer.Close()
			}
		}
	}
	defer closer()

	// Build agent options (no tools — just a simple completion).
	var agentOpts []fantasy.AgentOption
	if req.System != "" {
		agentOpts = append(agentOpts, fantasy.WithSystemPrompt(req.System))
	}
	if req.MaxTokens > 0 {
		agentOpts = append(agentOpts, fantasy.WithMaxOutputTokens(int64(req.MaxTokens)))
	}
	if providerOps != nil {
		agentOpts = append(agentOpts, fantasy.WithProviderOptions(providerOps))
	}

	completionAgent := fantasy.NewAgent(llmModel, agentOpts...)

	// Convert extension SessionMessage history to LLM message slice.
	var messages []fantasy.Message
	for _, sm := range req.Messages {
		messages = append(messages, fantasy.Message{
			Role: fantasy.MessageRole(sm.Role),
			Content: []fantasy.MessagePart{
				fantasy.TextPart{Text: sm.Content},
			},
		})
	}

	// Streaming path.
	if req.OnChunk != nil {
		result, err := completionAgent.Stream(ctx, fantasy.AgentStreamCall{
			Prompt:   req.Prompt,
			Messages: messages,
			OnTextDelta: func(_, text string) error {
				req.OnChunk(text)
				return nil
			},
		})
		if err != nil {
			return extensions.CompleteResponse{}, fmt.Errorf("streaming completion: %w", err)
		}
		return extensions.CompleteResponse{
			Text:         result.Response.Content.Text(),
			InputTokens:  int(result.Response.Usage.InputTokens),
			OutputTokens: int(result.Response.Usage.OutputTokens),
			Model:        usedModel,
		}, nil
	}

	// Non-streaming path.
	result, err := completionAgent.Generate(ctx, fantasy.AgentCall{
		Prompt:   req.Prompt,
		Messages: messages,
	})
	if err != nil {
		return extensions.CompleteResponse{}, fmt.Errorf("completion: %w", err)
	}
	return extensions.CompleteResponse{
		Text:         result.Response.Content.Text(),
		InputTokens:  int(result.Response.Usage.InputTokens),
		OutputTokens: int(result.Response.Usage.OutputTokens),
		Model:        usedModel,
	}, nil
}

// Options configures Kit creation with optional overrides for model,
// prompts, configuration, and behavior settings. All fields are optional
// and will use CLI defaults if not specified.
//
// Global viper state warning:
// Options are applied by [New] via [viper.Set] calls against viper's
// process-global store. This store is shared with every downstream reader
// (e.g. [Kit.SetModel], [Kit.GetThinkingLevel], BuildProviderConfig, and
// any other code path that calls viper.Get*). Two consequences:
//
//  1. Kit instances are NOT isolated from each other within a single
//     process. Values set by the second New() call overwrite the first,
//     and any code that later reads viper will see the most recent Set.
//  2. Fields left at the zero value do NOT clear prior viper state; they
//     simply skip the viper.Set. Callers that need a clean slate between
//     constructions should invoke viper.Reset() (the test suite uses a
//     private resetViper() helper that wraps it) before the next New().
//
// Recommended usage: create one Kit per process, or reset viper between
// constructions. Concurrent calls to New are serialized internally by
// [viperInitMu], but that mutex does not prevent later viper reads (from
// a different Kit) from observing mutated keys.
//
// TODO: refactor New to use a per-instance *viper.Viper (constructed via
// viper.New()) so each Kit owns its own isolated config store and Options
// no longer leak through the global singleton.
type Options struct {
	Model        string // Override model (e.g., "anthropic/claude-sonnet-4-5-20250929")
	SystemPrompt string // Override system prompt
	ConfigFile   string // Override config file path
	MaxSteps     int    // Override max steps (0 = use default)
	Streaming    bool   // Enable streaming (default from config)
	Quiet        bool   // Suppress debug output
	Tools        []Tool // Custom tool set. If empty, AllTools() is used.
	ExtraTools   []Tool // Additional tools added alongside core/MCP/extension tools.

	// Generation parameters. These override the corresponding values from
	// .kit.yml / KIT_* environment variables. Leaving a field at its
	// zero/nil value means "use the configured default", which in turn
	// falls back to per-model defaults (modelSettings / customModels) and
	// finally to a last-resort SDK floor of 8192 for MaxTokens (matching
	// the CLI --max-tokens default; sampling params fall through to
	// provider-level defaults).
	//
	// Pointer types are used for sampling parameters so the SDK can
	// distinguish "explicitly set to 0" from "leave alone".

	// MaxTokens overrides the maximum output tokens per LLM response.
	// 0 = let the precedence chain resolve a value (env → config →
	// per-model → 8192 SDK floor, matching the CLI default). Setting a
	// non-zero value here suppresses automatic right-sizing, matching
	// the CLI's --max-tokens flag semantics. Bump this when generating
	// long outputs (HTML artifacts, large refactors, etc.) to avoid
	// silent truncation mid-tool-call. The cap also applies after
	// model switches via [Kit.SetModel].
	MaxTokens int

	// ThinkingLevel sets the reasoning effort for models that support
	// extended thinking. Valid values: "off", "none", "minimal", "low",
	// "medium", "high". "" = let the precedence chain resolve a level
	// (env → config → per-model → "off"). Use [Kit.SetThinkingLevel]
	// to change at runtime.
	ThinkingLevel string

	// Temperature controls sampling randomness (typically 0.0–2.0).
	// nil = leave provider/per-model default in place. Pointer type
	// so explicit 0.0 (deterministic) is distinguishable from "unset".
	Temperature *float32

	// TopP is the nucleus-sampling cutoff (0.0–1.0).
	// nil = leave provider/per-model default in place.
	TopP *float32

	// TopK limits sampling to the top K tokens.
	// nil = leave provider/per-model default in place.
	TopK *int32

	// FrequencyPenalty discourages repeated tokens (OpenAI-family models).
	// nil = leave provider/per-model default in place.
	FrequencyPenalty *float32

	// PresencePenalty discourages repeating topics (OpenAI-family models).
	// nil = leave provider/per-model default in place.
	PresencePenalty *float32

	// Provider configuration. These override values normally read from
	// .kit.yml or provider-specific environment variables. Useful when
	// loading credentials from a secrets manager, pointing at custom
	// OpenAI-compatible endpoints (LiteLLM, vLLM, Azure OpenAI, internal
	// proxies), or running against self-hosted infrastructure.

	// ProviderAPIKey overrides the API key used to authenticate with the
	// model provider. "" = use the value from config or the
	// provider-specific environment variable.
	ProviderAPIKey string

	// ProviderURL overrides the provider endpoint. "" = use the provider's
	// default URL.
	ProviderURL string

	// TLSSkipVerify disables TLS certificate verification on provider
	// HTTP clients. Only set this for self-signed certificates in
	// development. Once enabled here it cannot be disabled via Options
	// (use the config file or env var to opt back out).
	TLSSkipVerify bool

	// SkipConfig, when true, skips loading .kit.yml configuration files.
	// Viper defaults (setSDKDefaults) and environment variables (KIT_*)
	// are still applied. Use this for fully programmatic configuration.
	SkipConfig bool

	// DisableCoreTools, when true, prevents loading any core tools.
	// Use with Tools or ExtraTools to provide only custom tools.
	// If both DisableCoreTools is true and Tools is empty, the agent
	// will have no tools (useful for simple chat completions).
	DisableCoreTools bool

	// Session configuration
	SessionDir  string // Base directory for session discovery (default: cwd)
	SessionPath string // Open a specific session file by path
	Continue    bool   // Continue the most recent session for SessionDir
	NoSession   bool   // Ephemeral mode — in-memory session, no persistence

	// Skills
	Skills    []string // Explicit skill files/dirs to load (empty = auto-discover)
	SkillsDir string   // Override default project-local skills directory
	NoSkills  bool     // Disable skill loading entirely (auto-discovery and explicit)

	// NoExtensions disables Yaegi extension loading entirely.
	NoExtensions bool

	// NoContextFiles disables automatic loading of project context files
	// (e.g. AGENTS.md) from the working directory.
	NoContextFiles bool

	// MCPConfig provides a pre-loaded MCP configuration. When set,
	// LoadAndValidateConfig is skipped during Kit creation — avoiding
	// viper access entirely. This is set automatically for in-process
	// subagents (inheriting the parent's loaded config) and can be used
	// by SDK consumers who build config programmatically.
	MCPConfig *config.Config

	// InProcessMCPServers registers mcp-go servers that run in the same
	// process. Each key is the server name (used to prefix tool names, e.g.
	// "docs__search"). The value must be a *[server.MCPServer].
	//
	// In-process servers bypass subprocess spawning and network I/O entirely.
	// Kit does not take ownership of the servers — the caller is responsible
	// for any cleanup after [Kit.Close].
	//
	// Example:
	//
	//	mcpSrv := server.NewMCPServer("my-tools", "1.0.0",
	//	    server.WithToolCapabilities(true),
	//	)
	//	mcpSrv.AddTool(mcp.NewTool("search", ...), handler)
	//
	//	host, _ := kit.New(ctx, &kit.Options{
	//	    InProcessMCPServers: map[string]*kit.MCPServer{
	//	        "docs": mcpSrv,
	//	    },
	//	})
	InProcessMCPServers map[string]*MCPServer

	// Compaction
	AutoCompact       bool               // Auto-compact when near context limit
	CompactionOptions *CompactionOptions // Config for auto-compaction (nil = defaults)

	// Debug enables debug logging for the SDK.
	Debug bool

	// MCPAuthHandler handles OAuth authorization for remote MCP servers.
	// When set, remote transports (streamable HTTP, SSE) are configured
	// with OAuth support. If the server returns a 401, the handler is
	// invoked to let the user authorize.
	//
	// If nil, OAuth is disabled: remote MCP servers requiring authorization
	// will fail to connect and the underlying authorization-required error
	// is surfaced to the caller. The SDK deliberately does not construct a
	// default handler — doing so would bind a local TCP port and trigger
	// presentation I/O (browser open, stderr writes) without the consumer
	// opting in, which is wrong for library, daemon, or web-app embedders.
	//
	// CLI consumers: pass [NewCLIMCPAuthHandler] to get the standard
	// "open browser + print status" behavior.
	//
	// Custom UX: implement [MCPAuthHandler] directly, or use
	// [DefaultMCPAuthHandler] and set its OnAuthURL hook to plug in your
	// own presentation (TUI modal, QR code, web redirect, etc.).
	MCPAuthHandler MCPAuthHandler

	// MCPTokenStoreFactory, if non-nil, is called to create a token store for
	// each remote MCP server that requires OAuth. The factory receives the
	// server's URL and returns a [MCPTokenStore] implementation.
	//
	// When nil (default), tokens are persisted to a JSON file at
	// $XDG_CONFIG_HOME/.kit/mcp_tokens.json (or ~/.config/.kit/mcp_tokens.json).
	//
	// Use this to store tokens in a database, encrypt them, keep them
	// in-memory, or write them to a custom file path.
	MCPTokenStoreFactory MCPTokenStoreFactory

	// OnMCPServerLoaded, if non-nil, is called when each MCP server finishes
	// loading during Kit initialization. The callback receives the server name,
	// tool count, and any error. Called from a background goroutine; safe to
	// call app.NotifyMCPServerLoaded() from within the callback to display
	// real-time progress in the TUI.
	OnMCPServerLoaded func(serverName string, toolCount int, err error)

	// MCPTaskMode overrides the per-server [MCPTaskMode] for task-augmented
	// tools/call execution. Keys are MCP server names. Servers not present
	// in the map fall back to the TasksMode field of MCPServerConfig (or
	// MCPTaskModeAuto when that is empty). See the MCP Tasks spec for the
	// underlying semantics:
	// https://modelcontextprotocol.io/specification/2025-11-25/basic/utilities/tasks
	MCPTaskMode map[string]MCPTaskMode

	// MCPTaskTimeout is the maximum wall-clock duration to wait for a
	// task-augmented tool call to reach a terminal state. Independent of
	// any per-call context deadline; whichever fires first wins. Zero
	// means use the default (15 minutes).
	MCPTaskTimeout time.Duration

	// MCPTaskTTL is the TTL hint sent in TaskParams for every
	// task-augmented tools/call. Zero omits the TTL and lets the server
	// pick its own retention policy.
	MCPTaskTTL time.Duration

	// MCPTaskPollInterval is the fallback interval between tasks/get
	// requests when the server does not suggest one. Zero means use the
	// default (1 second).
	MCPTaskPollInterval time.Duration

	// MCPTaskMaxPollInterval caps the polling interval (a server-supplied
	// pollInterval can otherwise grow without bound). Zero means use the
	// default (5 seconds).
	MCPTaskMaxPollInterval time.Duration

	// MCPTaskProgress, if non-nil, is invoked once when a task is accepted
	// and on every status transition observed by the polling loop. The
	// final invocation always carries a terminal status. Implementations
	// must not block; long work should run on a goroutine.
	MCPTaskProgress MCPTaskProgressHandler

	// CLI is optional CLI-specific configuration. SDK users leave this nil.
	CLI *CLIOptions

	// SessionManager allows custom session storage backends.
	// If nil (default), Kit uses the built-in file-based TreeManager.
	// When provided, SessionPath, Continue, and NoSession options are ignored.
	SessionManager SessionManager
}

// CLIOptions holds fields only relevant to the CLI binary. SDK users should
// not need these; they are separated to keep the main Options struct clean.
type CLIOptions struct {
	// MCPConfig is a pre-loaded MCP config. When set, LoadAndValidateConfig
	// is skipped during Kit creation.
	MCPConfig *config.Config
	// ShowSpinner shows a loading spinner for Ollama models.
	ShowSpinner bool
	// SpinnerFunc provides the spinner implementation (nil = no spinner).
	SpinnerFunc SpinnerFunc
	// UseBufferedLogger buffers debug messages for later display.
	UseBufferedLogger bool
	// ProgressReaderFunc wraps an io.Reader with a progress display for
	// long-running operations such as Ollama model pulls. The returned
	// io.ReadCloser must be closed when done. When nil, progress is not
	// displayed.
	ProgressReaderFunc func(io.Reader) io.ReadCloser
}

// InitTreeSession creates or opens a tree session based on the given options.
// Both kit.New() and the CLI use this function so session initialisation
// logic lives in one place.
//
// Behaviour based on Options:
//   - NoSession:   in-memory tree session (no persistence)
//   - Continue:    resume most recent session for SessionDir (or cwd)
//   - SessionPath: open a specific JSONL session file
//   - default:     create a new tree session for SessionDir (or cwd)
func InitTreeSession(opts *Options) (*session.TreeManager, error) {
	if opts == nil {
		opts = &Options{}
	}

	sessionDir := opts.SessionDir
	if sessionDir == "" {
		sessionDir, _ = os.Getwd()
	}

	if opts.NoSession {
		return session.InMemoryTreeSession(sessionDir), nil
	}

	if opts.Continue {
		return session.ContinueRecent(sessionDir)
	}

	if opts.SessionPath != "" {
		return session.OpenTreeSession(opts.SessionPath)
	}

	// Default: create a new tree session for the working directory.
	return session.CreateTreeSession(sessionDir)
}

// viperInitMu serializes viper writes during [New]. Viper's global state
// is not thread-safe, so concurrent calls (e.g. parallel subagent spawns)
// must not overlap the Set/Get window. Note that this mutex only protects
// the construction window — it does not isolate long-lived Kit instances
// from each other. See the "Global viper state warning" on [Options].
var viperInitMu sync.Mutex

// New creates a Kit instance using the same initialization as the CLI.
// It loads configuration, initializes MCP servers, creates the LLM model, and
// sets up the agent for interaction. Returns an error if initialization fails.
//
// Global viper state warning: fields on [Options] are applied by calling
// [viper.Set] on viper's process-global store. As a result, two Kits
// constructed in the same process are NOT isolated: the second New
// overwrites viper keys set by the first, and any downstream reader
// (e.g. [Kit.SetModel], [Kit.GetThinkingLevel]) will observe the most
// recent value. Callers that need multiple independent Kits should call
// viper.Reset() between constructions, or avoid constructing more than
// one Kit per process. Writes during New are serialized by [viperInitMu].
//
// TODO: refactor to use a per-call viper.New() instance so each Kit owns
// its own isolated config store and Options stop leaking through the
// global singleton.
func New(ctx context.Context, opts *Options) (*Kit, error) {
	if opts == nil {
		opts = &Options{}
	}

	// All viper writes (SetSDKDefaults, InitConfig, Set calls, system-prompt
	// composition) happen under viperInitMu. We also call BuildProviderConfig
	// here — it's fast (just reads) — so we can capture the full config
	// snapshot before releasing the lock. The expensive work (MCP loading,
	// provider creation, session init) then runs outside the lock, allowing
	// parallel subagent spawns to proceed concurrently.
	var (
		providerConfig        *models.ProviderConfig
		modelString           string
		cwd                   string
		contextFiles          []*ContextFile
		loadedSkills          []*Skill
		mcpConfig             *config.Config
		debug                 bool
		noExtensions          bool
		maxSteps              int
		streaming             bool
		hasCustomSystemPrompt bool
		systemPromptSource    string
	)

	if err := func() error {
		viperInitMu.Lock()
		defer viperInitMu.Unlock()

		// Set CLI-equivalent defaults for viper. When used as an SDK (without
		// cobra), these defaults are not registered via flag bindings.
		setSDKDefaults()

		// Initialize config (loads config files and env vars).
		// Only initialize if not already done (e.g., by CLI's cobra.OnInitialize).
		// Check if model is already set, which indicates config was loaded.
		// SkipConfig bypasses .kit.yml file loading (viper defaults and env vars still apply).
		if !opts.SkipConfig && viper.GetString("model") == "" {
			if err := InitConfig(opts.ConfigFile, false); err != nil {
				return fmt.Errorf("failed to initialize config: %w", err)
			}
		}

		// Handle CLI debug mode.
		if opts.Debug {
			viper.Set("debug", true)
		}

		// Override viper settings with options.
		if opts.Model != "" {
			viper.Set("model", opts.Model)
		}
		if opts.SystemPrompt != "" {
			viper.Set("system-prompt", opts.SystemPrompt)
		}
		if opts.MaxSteps > 0 {
			viper.Set("max-steps", opts.MaxSteps)
		}
		viper.Set("stream", opts.Streaming)

		// Generation parameter overrides. Each Options field, when set,
		// is pushed into viper here so the existing downstream code
		// (BuildProviderConfig, SetModel, modelSettings lookups) picks
		// it up uniformly. Pointer-typed sampling params use viper.Set
		// only when non-nil so that nil means "leave provider/per-model
		// default in place" (BuildProviderConfig keys off viper.IsSet).
		if opts.MaxTokens > 0 {
			viper.Set("max-tokens", opts.MaxTokens)
		}
		if opts.ThinkingLevel != "" {
			viper.Set("thinking-level", opts.ThinkingLevel)
		}
		if opts.Temperature != nil {
			viper.Set("temperature", *opts.Temperature)
		}
		if opts.TopP != nil {
			viper.Set("top-p", *opts.TopP)
		}
		if opts.TopK != nil {
			viper.Set("top-k", *opts.TopK)
		}
		if opts.FrequencyPenalty != nil {
			viper.Set("frequency-penalty", *opts.FrequencyPenalty)
		}
		if opts.PresencePenalty != nil {
			viper.Set("presence-penalty", *opts.PresencePenalty)
		}

		// Provider overrides. TLSSkipVerify only takes effect when true —
		// callers wanting to force-disable should use the config file or
		// env var instead.
		if opts.ProviderAPIKey != "" {
			viper.Set("provider-api-key", opts.ProviderAPIKey)
		}
		if opts.ProviderURL != "" {
			viper.Set("provider-url", opts.ProviderURL)
		}
		if opts.TLSSkipVerify {
			viper.Set("tls-skip-verify", true)
		}

		// Resolve working directory for context/skill discovery.
		cwd = opts.SessionDir
		if cwd == "" {
			cwd, _ = os.Getwd()
		}

		// Load context files (AGENTS.md) from the project root.
		if !opts.NoContextFiles {
			contextFiles = loadContextFiles(cwd)
		}

		// Load skills — either from explicit paths or via auto-discovery.
		if !opts.NoSkills {
			var err error
			loadedSkills, err = loadSkills(opts)
			if err != nil {
				return fmt.Errorf("failed to load skills: %w", err)
			}
		}

		// Always compose the system prompt with runtime context: base prompt +
		// AGENTS.md context + skills metadata + date/cwd.
		//
		// If the configured model has a per-model system prompt (via
		// modelSettings or customModels params) and the user hasn't
		// explicitly set system-prompt, use the per-model prompt as the
		// base instead of the global default.
		{
			rawPromptInput := viper.GetString("system-prompt")

			// Resolve a file path to its content so PromptBuilder receives the
			// actual prompt text rather than a literal path string. Without this,
			// when system-prompt is set to a file path in the config file or via
			// --system-prompt, the path itself becomes the effective system prompt
			// sent to the model (LoadSystemPrompt only ran later, after viper had
			// been overwritten with the augmented base text).
			basePrompt, _ := config.LoadSystemPrompt(rawPromptInput)
			if basePrompt == "" {
				basePrompt = rawPromptInput
			}

			// Track whether the user explicitly configured a custom system
			// prompt. When they haven't (basePrompt is the built-in default
			// or empty), per-model system prompts can replace it on switch.
			userSetSystemPrompt := basePrompt != "" && basePrompt != defaultSystemPrompt
			hasCustomSystemPrompt = userSetSystemPrompt
			if hasCustomSystemPrompt {
				systemPromptSource = rawPromptInput
			}

			// Check for per-model system prompt override when no explicit
			// global system-prompt was configured by the user.
			if !userSetSystemPrompt {
				modelStr := viper.GetString("model")
				if modelStr != "" {
					if mi := models.LookupModelForSettings(modelStr); mi != nil {
						var perModelParams *models.GenerationParams
						// modelSettings takes priority over custom model params.
						if ms := models.LoadModelSettingsFromConfig(); ms != nil {
							perModelParams = ms[modelStr]
						}
						if perModelParams == nil && mi.Params != nil {
							perModelParams = mi.Params
						}
						if perModelParams != nil && perModelParams.SystemPrompt != "" {
							basePrompt = models.LoadSystemPromptValue(perModelParams.SystemPrompt)
						}
					}
				}
			}

			pb := skills.NewPromptBuilder(basePrompt)

			// Inject AGENTS.md content as project context.
			for _, cf := range contextFiles {
				pb.WithSection("", fmt.Sprintf("Instructions from: %s\n\n%s", cf.Path, cf.Content))
			}

			// Inject skills metadata (name + description + location).
			if len(loadedSkills) > 0 {
				pb.WithSkills(loadedSkills)
			}

			// Append current date/time and working directory.
			pb.WithSection("", fmt.Sprintf(
				"Current date and time: %s\nCurrent working directory: %s",
				time.Now().Format("Monday, January 2, 2006, 3:04:05 PM MST"), cwd,
			))

			viper.Set("system-prompt", pb.Build())
		}

		// Snapshot all viper-derived values now, while the lock is held.
		// BuildProviderConfig is fast (pure reads), so we do it here.
		var pcErr error
		providerConfig, _, pcErr = kitsetup.BuildProviderConfig()
		if pcErr != nil {
			return fmt.Errorf("failed to build provider config: %w", pcErr)
		}

		// SDK last-resort max-tokens floor. When nothing — Options, env,
		// config, nor a per-model default — supplied a value, we land on
		// zero here (viper.GetInt returns 0 for unset keys). Apply the
		// SDK default directly on the struct rather than via viper so
		// viper.IsSet("max-tokens") stays false: downstream right-sizing
		// can still raise this toward the model's known output ceiling,
		// and per-model modelSettings[...].maxTokens can still win.
		if providerConfig.MaxTokens == 0 && opts.MaxTokens == 0 {
			providerConfig.MaxTokens = sdkDefaultMaxTokens
		}
		modelString = viper.GetString("model")
		debug = viper.GetBool("debug")
		noExtensions = opts.NoExtensions || viper.GetBool("no-extensions")
		maxSteps = viper.GetInt("max-steps")
		streaming = viper.GetBool("stream")

		return nil
	}(); err != nil {
		return nil, err
	}
	// ---- viperInitMu released — heavy I/O below runs concurrently ----

	// Load MCP configuration. Use pre-loaded config if provided directly,
	// via CLI options, or load from viper as a last resort.
	if opts.MCPConfig != nil {
		mcpConfig = opts.MCPConfig
	} else if opts.CLI != nil && opts.CLI.MCPConfig != nil {
		mcpConfig = opts.CLI.MCPConfig
	}
	if mcpConfig == nil {
		var err error
		mcpConfig, err = config.LoadAndValidateConfig()
		if err != nil {
			return nil, fmt.Errorf("failed to load MCP config: %w", err)
		}
	}

	// Merge in-process MCP servers from Options into the MCP config.
	// These are programmatically-provided *server.MCPServer instances that
	// bypass subprocess spawning and network I/O.
	if len(opts.InProcessMCPServers) > 0 {
		if mcpConfig.MCPServers == nil {
			mcpConfig.MCPServers = make(map[string]config.MCPServerConfig, len(opts.InProcessMCPServers))
		}
		for name, srv := range opts.InProcessMCPServers {
			mcpConfig.MCPServers[name] = config.MCPServerConfig{
				Type:            "inprocess",
				InProcessServer: srv,
			}
		}
	}

	// Pre-create hook registries so the tool wrapper can reference them.
	// Hooks registered after New() returns are still invoked because the
	// wrapper captures the registries by pointer.
	beforeToolCall := newHookRegistry[BeforeToolCallHook, BeforeToolCallResult]()
	afterToolResult := newHookRegistry[AfterToolResultHook, AfterToolResultResult]()
	beforeTurn := newHookRegistry[BeforeTurnHook, BeforeTurnResult]()
	afterTurn := newHookRegistry[AfterTurnHook, AfterTurnResult]()
	contextPrepare := newHookRegistry[ContextPrepareHook, ContextPrepareResult]()
	beforeCompact := newHookRegistry[BeforeCompactHook, BeforeCompactResult]()
	prepareStep := newHookRegistry[PrepareStepHook, PrepareStepResult]()

	// Build agent setup options, pulling CLI-specific fields when available.
	// Pass the pre-built ProviderConfig and scalar viper snapshots so
	// SetupAgent doesn't need to re-read viper (which would require the lock).
	setupOpts := kitsetup.AgentSetupOptions{
		MCPConfig:         mcpConfig,
		Quiet:             opts.Quiet,
		CoreTools:         opts.Tools,
		DisableCoreTools:  opts.DisableCoreTools,
		ExtraTools:        opts.ExtraTools,
		ToolWrapper:       hookToolWrapper(beforeToolCall, afterToolResult),
		ProviderConfig:    providerConfig,
		Debug:             debug,
		NoExtensions:      noExtensions,
		MaxSteps:          maxSteps,
		StreamingEnabled:  streaming,
		OnMCPServerLoaded: opts.OnMCPServerLoaded,
		MCPTaskConfig: mcpTaskOptions{
			perServer:       opts.MCPTaskMode,
			defaultTTL:      opts.MCPTaskTTL,
			pollInterval:    opts.MCPTaskPollInterval,
			maxPollInterval: opts.MCPTaskMaxPollInterval,
			timeout:         opts.MCPTaskTimeout,
			progress:        opts.MCPTaskProgress,
		}.toToolsConfig(),
	}

	// Set up OAuth handler for remote MCP servers. The SDK does not create
	// a default handler: auto-construction would bind a local TCP port and
	// (historically) shell out to a browser without the consumer asking,
	// which is a surprise for library/daemon/web-app embedders. Consumers
	// that want CLI behavior pass a [CLIMCPAuthHandler] explicitly; other
	// consumers implement [MCPAuthHandler] themselves. If nil, remote MCP
	// servers requiring OAuth will fail to connect with the underlying
	// authorization-required error surfaced to the caller.
	//
	// The SDK MCPAuthHandler interface is structurally identical to
	// tools.MCPAuthHandler, so any implementation satisfies both.
	if opts.MCPAuthHandler != nil {
		setupOpts.AuthHandler = opts.MCPAuthHandler
	}

	// Set up custom token store factory for MCP OAuth tokens.
	// The SDK MCPTokenStoreFactory is structurally identical to
	// tools.TokenStoreFactory, so it can be assigned directly.
	if opts.MCPTokenStoreFactory != nil {
		setupOpts.TokenStoreFactory = tools.TokenStoreFactory(opts.MCPTokenStoreFactory)
	}

	if opts.CLI != nil {
		setupOpts.ShowSpinner = opts.CLI.ShowSpinner
		setupOpts.SpinnerFunc = agent.SpinnerFunc(opts.CLI.SpinnerFunc)
		setupOpts.UseBufferedLogger = opts.CLI.UseBufferedLogger
		if opts.CLI.ProgressReaderFunc != nil {
			providerConfig.ProgressReaderFunc = opts.CLI.ProgressReaderFunc
		}
	}

	// Create agent using shared setup with the hook tool wrapper.
	agentResult, err := kitsetup.SetupAgent(ctx, setupOpts)
	if err != nil {
		return nil, err
	}

	// Initialize session manager.
	var sessionManager SessionManager
	if opts.SessionManager != nil {
		// Use custom session manager provided by user.
		sessionManager = opts.SessionManager
	} else {
		// DEFAULT: Use built-in TreeManager (existing behavior).
		treeSession, err := InitTreeSession(opts)
		if err != nil {
			_ = agentResult.Agent.Close()
			return nil, fmt.Errorf("failed to initialize session: %w", err)
		}
		// Wrap TreeManager in adapter to satisfy SessionManager interface.
		sessionManager = NewTreeManagerAdapter(treeSession)
	}

	k := &Kit{
		agent:                 agentResult.Agent,
		session:               sessionManager,
		modelString:           modelString,
		events:                newEventBus(),
		autoCompact:           opts.AutoCompact,
		compactionOpts:        opts.CompactionOptions,
		contextFiles:          contextFiles,
		skills:                loadedSkills,
		extRunner:             agentResult.ExtRunner,
		bufferedLogger:        agentResult.BufferedLogger,
		authHandler:           setupOpts.AuthHandler,
		opts:                  opts,
		mcpConfig:             mcpConfig,
		hasCustomSystemPrompt: hasCustomSystemPrompt,
		systemPromptSource:    systemPromptSource,
		beforeToolCall:        beforeToolCall,
		afterToolResult:       afterToolResult,
		beforeTurn:            beforeTurn,
		afterTurn:             afterTurn,
		contextPrepare:        contextPrepare,
		beforeCompact:         beforeCompact,
		prepareStep:           prepareStep,
	}

	// Bridge extension events to SDK hooks.
	if agentResult.ExtRunner != nil {
		k.bridgeExtensions(agentResult.ExtRunner)

		// Initialize extension context with minimal defaults. SDK users can call
		// Extensions().SetContext to override with richer implementations (TUI callbacks,
		// prompts, etc.). This ensures extensions never crash on nil function fields.
		k.Extensions().SetContext(extensions.Context{
			CWD:         cwd,
			Model:       k.modelString,
			Interactive: false, // SDK mode defaults to non-interactive
		})
	}

	return k, nil
}

// GetContextFiles returns the context files (e.g. AGENTS.md) loaded during
// initialisation. Returns nil if no context files were found.
func (m *Kit) GetContextFiles() []*ContextFile {
	return m.contextFiles
}

// GetSkills returns the skills loaded during initialisation.
func (m *Kit) GetSkills() []*Skill {
	return m.skills
}

// ---------------------------------------------------------------------------
// Context file loading
// ---------------------------------------------------------------------------

// loadContextFiles discovers and loads project context files (AGENTS.md) from
// the working directory. Returns nil if no context file is found.
func loadContextFiles(cwd string) []*ContextFile {
	path := filepath.Join(cwd, "AGENTS.md")
	data, err := os.ReadFile(path)
	if err != nil {
		return nil
	}
	return []*ContextFile{{
		Path:    path,
		Content: strings.TrimSpace(string(data)),
	}}
}

// ---------------------------------------------------------------------------
// Skill command expansion
// ---------------------------------------------------------------------------

// expandSkillCommand checks whether prompt starts with "/skill:<name>" and, if
// so, re-reads the skill file, strips its YAML frontmatter, wraps the body in
// a <skill> block with baseDir metadata, and appends any trailing user args.
// Returns the original text unchanged when the prefix is absent or the skill is
// not found.
func (m *Kit) expandSkillCommand(prompt string) string {
	if !strings.HasPrefix(prompt, "/skill:") {
		return prompt
	}

	// Parse: /skill:name [args]
	rest := prompt[len("/skill:"):]
	name, args, _ := strings.Cut(rest, " ")
	name = strings.TrimSpace(name)
	if name == "" {
		return prompt
	}

	// Find the skill by name.
	var skillPath string
	for _, s := range m.skills {
		if s.Name == name {
			skillPath = s.Path
			break
		}
	}
	if skillPath == "" {
		return prompt
	}

	// Re-read the file for freshness (user may have edited it since startup).
	loaded, err := skills.LoadSkill(skillPath)
	if err != nil {
		return prompt
	}

	baseDir := filepath.Dir(loaded.Path)
	var buf strings.Builder
	fmt.Fprintf(&buf, "<skill name=%q location=%q>\n", loaded.Name, loaded.Path)
	fmt.Fprintf(&buf, "References are relative to %s.\n\n", baseDir)
	buf.WriteString(loaded.Content)
	buf.WriteString("\n</skill>")

	args = strings.TrimSpace(args)
	if args != "" {
		buf.WriteString("\n\n")
		buf.WriteString(args)
	}

	return buf.String()
}

// ---------------------------------------------------------------------------
// Skills loading
// ---------------------------------------------------------------------------

// loadSkills loads skills based on Options. If explicit paths are provided
// they are loaded directly; otherwise auto-discovery runs.
func loadSkills(opts *Options) ([]*skills.Skill, error) {
	if len(opts.Skills) > 0 {
		return loadExplicitSkills(opts.Skills)
	}

	// Auto-discover from standard directories.
	cwd := opts.SkillsDir
	if cwd == "" {
		cwd = opts.SessionDir
	}
	return skills.LoadSkills(cwd)
}

// loadExplicitSkills loads skills from a list of explicit paths. Each path
// can be a file or a directory.
func loadExplicitSkills(paths []string) ([]*skills.Skill, error) {
	seen := make(map[string]bool)
	var all []*skills.Skill

	for _, p := range paths {
		info, err := os.Stat(p)
		if err != nil {
			return nil, fmt.Errorf("skill path %s: %w", p, err)
		}

		if info.IsDir() {
			dirSkills, err := skills.LoadSkillsFromDir(p)
			if err != nil {
				return nil, err
			}
			for _, s := range dirSkills {
				if !seen[s.Path] {
					seen[s.Path] = true
					all = append(all, s)
				}
			}
		} else {
			abs, _ := filepath.Abs(p)
			if !seen[abs] {
				seen[abs] = true
				s, err := skills.LoadSkill(p)
				if err != nil {
					return nil, err
				}
				all = append(all, s)
			}
		}
	}

	return all, nil
}

// ---------------------------------------------------------------------------
// TurnResult
// ---------------------------------------------------------------------------

// TurnResult contains the full result of a prompt turn, including usage
// statistics and the updated conversation. Use PromptResult() instead of
// Prompt() when you need access to this data.
type TurnResult struct {
	// Response is the assistant's final text response.
	Response string

	// StopReason indicates why the turn ended. Derived from the LLM
	// provider's finish reason: FinishReasonStop, FinishReasonLength (max
	// output tokens reached), FinishReasonToolCalls, FinishReasonContentFilter,
	// FinishReasonError, FinishReasonOther, FinishReasonUnknown.
	StopReason string

	// SessionID is the UUID of the session this turn belongs to.
	SessionID string

	// TotalUsage is the aggregate token usage across all steps in the turn
	// (includes tool-calling loop iterations). Nil if the provider didn't
	// report usage.
	TotalUsage *LLMUsage

	// FinalUsage is the token usage from the last API call only. For context
	// window fill, sum all categories: InputTokens + CacheReadTokens +
	// CacheCreationTokens + OutputTokens. With prompt caching, InputTokens
	// alone understates the context (cached tokens are reported separately).
	// Nil if unavailable.
	FinalUsage *LLMUsage

	// Messages is the full updated conversation after the turn, including
	// any tool call/result messages added during the agent loop.
	// Each message carries role and plain-text content.
	Messages []LLMMessage
}

// ---------------------------------------------------------------------------
// In-process subagent
// ---------------------------------------------------------------------------

// SubagentConfig configures an in-process subagent spawned via Kit.Subagent().
type SubagentConfig struct {
	// Prompt is the task/instruction for the subagent (required).
	Prompt string

	// Model overrides the parent's model (e.g. "anthropic/claude-haiku-3-5-20241022").
	// Empty string uses the parent's current model.
	Model string

	// SystemPrompt provides domain-specific instructions for the subagent.
	// Empty string uses a minimal default prompt.
	SystemPrompt string

	// Tools overrides the tool set. If nil, SubagentTools() is used (all
	// core tools except subagent, preventing infinite recursion).
	Tools []Tool

	// NoSession, when true, uses an in-memory ephemeral session. When false
	// (default), the subagent's session is persisted and can be loaded for
	// replay/inspection.
	NoSession bool

	// Timeout limits execution time. Zero means 5 minute default.
	Timeout time.Duration

	// OnEvent, when set, receives all events from the subagent's event bus.
	// This enables the parent to stream subagent tool calls, text chunks,
	// etc. in real time.
	OnEvent func(Event)
}

// SubagentResult contains the outcome of an in-process subagent execution.
// Errors are returned as the error return value of Subagent(), not in this struct.
type SubagentResult struct {
	// Response is the subagent's final text response.
	Response string
	// SessionID is the subagent's session identifier (for replay).
	SessionID string
	// StopReason is the LLM's finish reason for the subagent's final turn.
	StopReason string
	// Usage contains token usage from the subagent's run.
	Usage *LLMUsage
	// Elapsed is the total execution time.
	Elapsed time.Duration
}

// Subagent spawns an in-process child Kit instance to perform a task. The
// child gets its own session, event bus, and agent loop but shares the
// parent's config (API keys, provider settings) and defaults to the parent's
// model when SubagentConfig.Model is empty.
//
// This is the recommended way to run subagents in the SDK — no subprocess,
// no kit binary dependency, native Go types for results.
func (m *Kit) Subagent(ctx context.Context, cfg SubagentConfig) (*SubagentResult, error) {
	if cfg.Prompt == "" {
		return nil, fmt.Errorf("subagent prompt is required")
	}

	start := time.Now()

	// Default timeout.
	timeout := cfg.Timeout
	if timeout == 0 {
		timeout = 5 * time.Minute
	}

	// Pre-flight check: if the incoming context is already dead, don't
	// waste time attempting init. This catches the case where the parent
	// generation loop's context was cancelled (e.g. user ESC, step cancel)
	// between when the LLM requested the subagent tool and when this code
	// runs. We replace it with a fresh context carrying only the timeout,
	// since the subagent should be independently bounded.
	if ctx.Err() != nil {
		ctx = context.Background()
	}
	ctx, cancel := context.WithTimeout(ctx, timeout)
	defer cancel()

	// Resolve model: fall back to parent's model, and inherit the parent's
	// provider when only a bare model name is given (e.g. "claude-haiku"
	// instead of "anthropic/claude-haiku"). This avoids provider guessing.
	model := cfg.Model
	if model == "" {
		model = m.modelString
	} else if !strings.Contains(model, "/") {
		// Bare model name — prepend parent's provider.
		if parts := strings.SplitN(m.modelString, "/", 2); len(parts) == 2 {
			model = parts[0] + "/" + model
		}
	}

	// Early validation: check model format and provider before doing any
	// expensive work (MCP init, system prompt composition, etc.). This
	// gives the calling agent immediate feedback it can act on — e.g.
	// correcting a typo — instead of waiting for a full Kit.New() cycle
	// that silently falls back to the parent model.
	if model != m.modelString {
		if err := models.GetGlobalRegistry().ValidateModelString(model); err != nil {
			return nil, fmt.Errorf("invalid subagent model %q: %w", model, err)
		}
	}

	// Default system prompt.
	systemPrompt := cfg.SystemPrompt
	if systemPrompt == "" {
		systemPrompt = "You are a helpful coding assistant. Complete the task efficiently and thoroughly."
	}

	// Default tools: everything except subagent.
	tools := cfg.Tools
	if tools == nil {
		tools = SubagentTools()
	}

	// Create child Kit instance. Pass the parent's loaded MCP config to
	// avoid re-reading viper (which races with concurrent subagent spawns).
	// Streaming must be explicitly enabled — Options.Streaming defaults to
	// false, and New() unconditionally writes viper.Set("stream", opts.Streaming).
	// Without this, the subagent would (a) pollute viper global state for
	// other concurrent callers and (b) potentially hit provider-level
	// differences (e.g. Anthropic non-streaming timeouts with extended
	// thinking).
	childOpts := &Options{
		Model:        model,
		SystemPrompt: systemPrompt,
		Tools:        tools,
		NoSession:    cfg.NoSession,
		Quiet:        true,
		Streaming:    true,
		MCPConfig:    m.mcpConfig,
	}
	// Propagate the parent's MCP task configuration so a child subagent
	// invoking long-running MCP tools observes the same per-server modes,
	// timeouts, and progress callback as the parent. Without this, child
	// agents would silently fall back to MCPTaskModeAuto with default
	// polling and no progress feedback even when the parent had configured
	// custom values.
	inheritMCPTaskOptions(childOpts, m.opts)
	child, err := New(ctx, childOpts)
	if err != nil {
		return &SubagentResult{Elapsed: time.Since(start)}, fmt.Errorf("failed to create subagent: %w", err)
	}
	defer func() { _ = child.Close() }()

	// Forward events to parent if requested.
	if cfg.OnEvent != nil {
		child.Subscribe(cfg.OnEvent)
	}

	// Run the prompt.
	result, err := child.PromptResult(ctx, cfg.Prompt)
	elapsed := time.Since(start)

	if err != nil {
		return &SubagentResult{Elapsed: elapsed}, err
	}

	subResult := &SubagentResult{
		Response:   result.Response,
		SessionID:  child.GetSessionID(),
		StopReason: result.StopReason,
		Elapsed:    elapsed,
	}
	if result.TotalUsage != nil {
		subResult.Usage = result.TotalUsage
	}

	return subResult, nil
}

// ---------------------------------------------------------------------------
// Shared generation helpers
// ---------------------------------------------------------------------------

// generate calls the agent's generation loop with event-emitting handlers.
// All prompt modes (Prompt, Steer, FollowUp, PromptWithOptions) share this
// single code path so callback wiring is never duplicated.
func (m *Kit) generate(ctx context.Context, messages []fantasy.Message) (*agent.GenerateWithLoopResult, error) {
	// Create a per-turn steer channel and attach it to the context so the
	// agent's PrepareStep can inject steering messages between steps.
	steerCh := make(chan agent.SteerMessage, 16)
	m.steerMu.Lock()
	m.steerCh = steerCh
	m.steerMu.Unlock()
	defer func() {
		// Drain any unconsumed steer messages before nilling the channel.
		// These are stored in leftoverSteer so DrainSteer() can return them.
		var leftover []agent.SteerMessage
		for {
			select {
			case msg := <-steerCh:
				leftover = append(leftover, msg)
			default:
				m.steerMu.Lock()
				m.steerCh = nil
				m.leftoverSteer = leftover
				m.steerMu.Unlock()
				return
			}
		}
	}()
	ctx = agent.ContextWithSteerCh(ctx, steerCh)
	ctx = agent.ContextWithSteerConsumed(ctx, func(count int) {
		m.events.emit(SteerConsumedEvent{Count: count})
	})

	// Inject the in-process subagent spawner into the context so the
	// subagent core tool can create child Kit instances without
	// importing pkg/kit (which would create an import cycle).
	ctx = core.WithSubagentSpawner(ctx, func(
		spawnCtx context.Context, toolCallID, prompt, model, systemPrompt string, timeout time.Duration,
	) (*core.SubagentSpawnResult, error) {
		// Build OnEvent: dispatch to per-tool-call listeners if any are
		// registered via SubscribeSubagent(). Listeners are cleaned up
		// after the subagent completes.
		var onEvent func(Event)
		if listeners := m.getSubagentListenerSet(toolCallID); listeners != nil {
			onEvent = listeners.emit
		}
		result, err := m.Subagent(spawnCtx, SubagentConfig{
			Prompt:       prompt,
			Model:        model,
			SystemPrompt: systemPrompt,
			Timeout:      timeout,
			OnEvent:      onEvent,
		})
		m.cleanupSubagentListeners(toolCallID)
		if result == nil {
			return &core.SubagentSpawnResult{Error: err}, err
		}
		sr := &core.SubagentSpawnResult{
			Response:  result.Response,
			Error:     err,
			SessionID: result.SessionID,
			Elapsed:   result.Elapsed,
		}
		if result.Usage != nil {
			sr.InputTokens = result.Usage.InputTokens
			sr.OutputTokens = result.Usage.OutputTokens
		}
		return sr, err
	})

	return m.agent.GenerateWithCallbacks(ctx, messages, agent.GenerateCallbacks{
		OnToolCall: func(toolCallID, toolName, toolArgs string) {
			m.events.emit(ToolCallEvent{
				ToolCallID: toolCallID, ToolName: toolName, ToolKind: toolKindFor(toolName),
				ToolArgs: toolArgs, ParsedArgs: parseToolArgs(toolArgs),
			})
		},
		OnToolExecution: func(toolCallID, toolName, toolArgs string, isStarting bool) {
			if isStarting {
				m.events.emit(ToolExecutionStartEvent{ToolCallID: toolCallID, ToolName: toolName, ToolKind: toolKindFor(toolName), ToolArgs: toolArgs})
			} else {
				m.events.emit(ToolExecutionEndEvent{ToolCallID: toolCallID, ToolName: toolName, ToolKind: toolKindFor(toolName)})
			}
		},
		OnToolResult: func(toolCallID, toolName, toolArgs, resultText, metadata string, isError bool) {
			evt := ToolResultEvent{
				ToolCallID: toolCallID, ToolName: toolName, ToolKind: toolKindFor(toolName),
				ToolArgs: toolArgs, ParsedArgs: parseToolArgs(toolArgs),
				Result: resultText, IsError: isError,
			}
			if metadata != "" {
				var meta ToolResultMetadata
				if err := json.Unmarshal([]byte(metadata), &meta); err == nil {
					evt.Metadata = &meta
				}
			}
			m.events.emit(evt)
		},
		OnResponse: func(content string) {
			m.events.emit(ResponseEvent{Content: content})
		},
		OnToolCallContent: func(content string) {
			m.events.emit(ToolCallContentEvent{Content: content})
		},
		// <think> tag filtering: models like Qwen/DeepSeek wrap reasoning inside
		// <think>...</think> tags in the regular text stream. We intercept those
		// spans here and re-route them as ReasoningDeltaEvent/ReasoningCompleteEvent
		// so callers always receive clean, tag-free text and structured reasoning.
		OnStreamingResponse: func() func(chunk string) {
			const (
				thinkOpen  = "<think>"
				thinkClose = "</think>"
			)
			var inThinkTag bool
			return func(chunk string) {
				remaining := chunk
				for remaining != "" {
					if inThinkTag {
						i := strings.Index(remaining, thinkClose)
						if i == -1 {
							m.events.emit(ReasoningDeltaEvent{Delta: remaining})
							return
						}
						if i > 0 {
							m.events.emit(ReasoningDeltaEvent{Delta: remaining[:i]})
						}
						inThinkTag = false
						m.events.emit(ReasoningCompleteEvent{})
						remaining = remaining[i+len(thinkClose):]
					} else {
						i := strings.Index(remaining, thinkOpen)
						if i == -1 {
							m.events.emit(MessageUpdateEvent{Chunk: remaining})
							return
						}
						if i > 0 {
							m.events.emit(MessageUpdateEvent{Chunk: remaining[:i]})
						}
						inThinkTag = true
						remaining = remaining[i+len(thinkOpen):]
					}
				}
			}
		}(),
		OnReasoningDelta: func(delta string) {
			m.events.emit(ReasoningDeltaEvent{Delta: delta})
		},
		OnReasoningComplete: func() {
			m.events.emit(ReasoningCompleteEvent{})
		},
		OnToolOutput: func(toolCallID, toolName, chunk string, isStderr bool) {
			m.events.emit(ToolOutputEvent{
				ToolCallID: toolCallID,
				ToolName:   toolName,
				Chunk:      chunk,
				IsStderr:   isStderr,
			})
		},
		// Persist step messages incrementally so that progress survives
		// crashes and long-running turns don't lose work.
		OnStepMessages: func(stepMessages []fantasy.Message) {
			for _, msg := range stepMessages {
				_, _ = m.session.AppendMessage(msg)
			}
		},
		OnStepUsage: func(inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens int64) {
			if viper.GetBool("debug") {
				log.Printf("DEBUG Kit.generate emitting StepUsageEvent: input=%d output=%d cacheRead=%d cacheCreate=%d",
					inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens,
				)
			}
			m.events.emit(StepUsageEvent{
				InputTokens:      uint64(inputTokens),
				OutputTokens:     uint64(outputTokens),
				CacheReadTokens:  uint64(cacheReadTokens),
				CacheWriteTokens: uint64(cacheCreationTokens),
			})
		},
		// Password prompt handler for sudo commands
		OnPasswordPrompt: func(prompt string) (string, bool) {
			responseCh := make(chan PasswordPromptResponse, 1)
			m.events.emit(PasswordPromptEvent{
				Prompt:     prompt,
				ResponseCh: responseCh,
			})
			resp := <-responseCh
			return resp.Password, resp.Cancelled
		},
		// Tool call argument streaming
		OnToolCallStart: func(toolCallID, toolName string) {
			m.events.emit(ToolCallStartEvent{
				ToolCallID: toolCallID,
				ToolName:   toolName,
				ToolKind:   toolKindFor(toolName),
			})
		},
		OnToolCallDelta: func(toolCallID, delta string) {
			m.events.emit(ToolCallDeltaEvent{
				ToolCallID: toolCallID,
				Delta:      delta,
			})
		},
		OnToolCallEnd: func(toolCallID string) {
			m.events.emit(ToolCallEndEvent{
				ToolCallID: toolCallID,
			})
		},

		// New callbacks for previously unwired Fantasy lifecycle events.
		OnStepStart: func(stepNumber int) {
			m.events.emit(StepStartEvent{StepNumber: stepNumber})
		},
		OnStepFinish: func(stepNumber int, hasToolCalls bool, finishReason string, usage fantasy.Usage) {
			m.events.emit(StepFinishEvent{
				StepNumber:   stepNumber,
				HasToolCalls: hasToolCalls,
				FinishReason: finishReason,
				Usage:        usage,
			})
		},
		OnTextStart: func(id string) {
			m.events.emit(TextStartEvent{ID: id})
		},
		OnTextEnd: func(id string) {
			m.events.emit(TextEndEvent{ID: id})
		},
		OnReasoningStart: func(id string) {
			m.events.emit(ReasoningStartEvent{ID: id})
		},
		OnWarnings: func(warnings []string) {
			m.events.emit(WarningsEvent{Warnings: warnings})
		},
		OnSource: func(sourceType, id, url, title string) {
			m.events.emit(SourceEvent{
				SourceType: sourceType,
				ID:         id,
				URL:        url,
				Title:      title,
			})
		},
		OnStreamFinish: func(usage fantasy.Usage, finishReason string) {
			m.events.emit(StreamFinishEvent{
				Usage:        usage,
				FinishReason: finishReason,
			})
		},
		OnError: func(err error) {
			m.events.emit(ErrorEvent{Error: err})
		},
		OnRetry: func(attempt int, err error) {
			m.events.emit(RetryEvent{Attempt: attempt, Error: err})
		},
		// PrepareStep hook — compose with steering (handled in agent layer)
		// and then run SDK consumer hooks.
		OnPrepareStep: func() agent.PrepareStepHandler {
			if !m.prepareStep.hasHooks() {
				return nil
			}
			return func(stepNumber int, messages []fantasy.Message) []fantasy.Message {
				hookResult := m.prepareStep.run(PrepareStepHook{
					StepNumber: stepNumber,
					Messages:   messages,
				})
				if hookResult != nil && hookResult.Messages != nil {
					return hookResult.Messages
				}
				return nil
			}
		}(),
	})
}

// runTurn is the shared lifecycle for every prompt mode:
//  1. Run BeforeTurn hooks (can modify prompt, inject messages).
//  2. Persist pre-generation messages to the tree session.
//  3. Build context from the tree (walks leaf-to-root for current branch).
//  4. Emit turn/message start events.
//  5. Run generation (messages are persisted incrementally per step).
//  6. Persist any remaining messages not covered by incremental persistence.
//  7. Emit turn/message end events.
//  8. Run AfterTurn hooks.
//
// During generation, each completed step's messages are persisted immediately
// via the onStepMessages callback. Tool calls are always persisted as
// call/response pairs (assistant + tool messages together). Reasoning and
// text-only assistant messages are persisted as soon as their step completes.
// This ensures long-running turns don't lose progress on crash or cancellation.
//
// promptLabel is the human-readable label emitted in TurnStartEvent.Prompt.
// prompt is the raw user text passed to BeforeTurn hooks.
func (m *Kit) runTurn(ctx context.Context, promptLabel string, prompt string, preMessages []fantasy.Message) (*TurnResult, error) {
	// Expand /skill:name commands — reads the skill file, wraps it in a
	// <skill> block, and appends any trailing user args.
	if expanded := m.expandSkillCommand(prompt); expanded != prompt {
		prompt = expanded
		// Replace the last user message in preMessages with the expanded text,
		// preserving any file parts (e.g. clipboard images).
		for i := len(preMessages) - 1; i >= 0; i-- {
			if preMessages[i].Role == fantasy.MessageRoleUser {
				files := extractFileParts(preMessages[i])
				preMessages[i] = fantasy.NewUserMessage(expanded, files...)
				break
			}
		}
	}

	// Run BeforeTurn hooks — can modify the prompt, inject system/context messages.
	if hookResult := m.beforeTurn.run(BeforeTurnHook{Prompt: prompt}); hookResult != nil {
		// Override prompt text in the last user message, preserving
		// any file parts (e.g. clipboard images).
		if hookResult.Prompt != nil {
			for i := len(preMessages) - 1; i >= 0; i-- {
				if preMessages[i].Role == fantasy.MessageRoleUser {
					files := extractFileParts(preMessages[i])
					preMessages[i] = fantasy.NewUserMessage(*hookResult.Prompt, files...)
					break
				}
			}
		}
		// Inject messages before the original preMessages.
		var injected []fantasy.Message
		if hookResult.SystemPrompt != nil {
			injected = append(injected, fantasy.NewSystemMessage(*hookResult.SystemPrompt))
		}
		if hookResult.InjectText != nil {
			injected = append(injected, fantasy.NewUserMessage(*hookResult.InjectText))
		}
		if len(injected) > 0 {
			preMessages = append(injected, preMessages...)
		}
	}

	// Persist pre-generation messages to session.
	for _, msg := range preMessages {
		_, _ = m.session.AppendMessage(msg)
	}

	// Auto-compact if enabled and conversation is near the context limit.
	if m.autoCompact && m.ShouldCompact() {
		_, _ = m.compactInternal(ctx, m.compactionOpts, "", true) // best-effort, automatic
	}

	// Build context from the session so only the current branch is sent.
	messages, _, _ := m.session.BuildContext()

	// Run ContextPrepare hooks — extensions can filter, reorder, or inject messages.
	if hookResult := m.contextPrepare.run(ContextPrepareHook{Messages: messages}); hookResult != nil && hookResult.Messages != nil {
		messages = hookResult.Messages
	}

	sentCount := len(messages)

	m.events.emit(TurnStartEvent{Prompt: promptLabel})
	m.events.emit(MessageStartEvent{})

	result, err := m.generate(ctx, messages)
	if err != nil {
		// Persist any messages from completed steps that were NOT already
		// persisted incrementally by the onStepMessages callback. The agent
		// layer only includes fully-paired tool_use + tool_result messages
		// in completedStepMessages, so there are no orphaned entries that
		// would break subsequent API requests.
		if result != nil {
			newMessages := result.ConversationMessages[sentCount:]
			alreadyPersisted := result.PersistedMessageCount
			if alreadyPersisted < len(newMessages) {
				for _, msg := range newMessages[alreadyPersisted:] {
					_, _ = m.session.AppendMessage(msg)
				}
			}
		}
		m.events.emit(TurnEndEvent{Error: err})
		// Run AfterTurn hooks even on error.
		m.afterTurn.run(AfterTurnHook{Error: err})
		return nil, err
	}

	responseText := result.FinalResponse.Content.Text()

	// Persist any new messages that were NOT already persisted incrementally
	// by the onStepMessages callback during generation. This handles the
	// non-streaming path (where onStepMessages is not called) and any edge
	// cases where the final response messages weren't covered by step callbacks.
	if len(result.ConversationMessages) > sentCount {
		newMessages := result.ConversationMessages[sentCount:]
		alreadyPersisted := result.PersistedMessageCount
		if alreadyPersisted < len(newMessages) {
			for _, msg := range newMessages[alreadyPersisted:] {
				_, _ = m.session.AppendMessage(msg)
			}
		}
	}

	// Store the API-reported token count so GetContextStats() matches the
	// built-in status bar. The context window is filled by all token
	// categories: non-cached input, cache reads, cache writes, and output.
	// With Anthropic prompt caching, InputTokens can be near-zero while
	// CacheReadTokens/CacheCreationTokens hold the bulk of the context.
	if result.FinalResponse != nil {
		u := result.FinalResponse.Usage
		m.lastInputTokensMu.Lock()
		m.lastInputTokens = int(u.InputTokens) + int(u.CacheReadTokens) + int(u.CacheCreationTokens) + int(u.OutputTokens)
		m.lastInputTokensMu.Unlock()
	}

	stopReason := result.StopReason

	m.events.emit(MessageEndEvent{Content: responseText})
	m.events.emit(TurnEndEvent{Response: responseText, StopReason: stopReason})

	// Run AfterTurn hooks.
	m.afterTurn.run(AfterTurnHook{Response: responseText})

	// Build TurnResult with usage stats.
	turnResult := &TurnResult{
		Response:   responseText,
		StopReason: stopReason,
		SessionID:  m.GetSessionID(),
		Messages:   result.ConversationMessages,
	}
	totalUsage := result.TotalUsage
	turnResult.TotalUsage = &totalUsage
	if result.FinalResponse != nil {
		finalUsage := result.FinalResponse.Usage
		turnResult.FinalUsage = &finalUsage
	}

	return turnResult, nil
}

// ---------------------------------------------------------------------------
// Prompt modes
// ---------------------------------------------------------------------------

// Prompt sends a message to the agent and returns the response. The agent may
// use tools as needed to generate the response. The conversation history is
// automatically maintained in the tree session. Lifecycle events are emitted
// to all registered subscribers. Returns an error if generation fails.
func (m *Kit) Prompt(ctx context.Context, message string) (string, error) {
	result, err := m.runTurn(ctx, message, message, []fantasy.Message{
		fantasy.NewUserMessage(message),
	})
	if err != nil {
		return "", err
	}
	return result.Response, nil
}

// Steer injects a system-level instruction and triggers a new agent turn.
// Use Steer to dynamically adjust agent behavior mid-conversation without a
// visible user message — for example, changing tone, focus, or constraints.
//
// Under the hood, Steer appends a system message (the instruction) followed by
// a synthetic user message so the agent acknowledges and follows the directive.
// Both messages are persisted to the session.
func (m *Kit) Steer(ctx context.Context, instruction string) (string, error) {
	result, err := m.runTurn(ctx, "[steer] "+instruction, instruction, []fantasy.Message{
		fantasy.NewSystemMessage(instruction),
		fantasy.NewUserMessage("Please acknowledge and follow the above instruction."),
	})
	if err != nil {
		return "", err
	}
	return result.Response, nil
}

// FollowUp continues the conversation without explicit new user input.
// If text is empty, "Continue." is used as the prompt. Use FollowUp when the
// agent's previous response was truncated or you want the agent to elaborate.
//
// Returns an error if there are no previous messages in the session.
func (m *Kit) FollowUp(ctx context.Context, text string) (string, error) {
	// Verify there is conversation history to follow up on.
	if len(m.session.GetMessages()) == 0 {
		return "", fmt.Errorf("cannot follow up: no previous messages")
	}

	if text == "" {
		text = "Continue."
	}

	result, err := m.runTurn(ctx, "[follow-up]", text, []fantasy.Message{
		fantasy.NewUserMessage(text),
	})
	if err != nil {
		return "", err
	}
	return result.Response, nil
}

// InjectSteer sends a steering message into the currently active agent turn.
// The message will be injected as a user message between steps (after the
// current tool execution finishes, before the next LLM call). If no turn is
// active the message is silently dropped — callers should check IsGenerating()
// or use Prompt()/Steer() for idle-state messaging.
//
// InjectSteer is safe to call from any goroutine. Multiple calls queue
// messages in order; all pending steer messages are drained and injected
// together at the next step boundary.
//
// This is the preferred way to redirect an agent mid-turn without cancelling
// in-progress tool execution.
func (m *Kit) InjectSteer(message string) {
	m.InjectSteerWithFiles(message, nil)
}

// InjectSteerWithFiles sends a steering message with optional file attachments
// (e.g. pasted images) into the currently active agent turn. Behaves like
// InjectSteer but includes file parts in the injected user message.
func (m *Kit) InjectSteerWithFiles(message string, files []LLMFilePart) {
	m.steerMu.Lock()
	ch := m.steerCh
	m.steerMu.Unlock()
	if ch == nil {
		return
	}
	select {
	case ch <- agent.SteerMessage{Text: message, Files: files}:
	default:
		// Channel full — extremely unlikely with buffer of 16, but don't block.
	}
}

// IsGenerating returns true if an agent turn is currently in progress.
// Use this to decide between InjectSteer (mid-turn) and Prompt (new turn).
func (m *Kit) IsGenerating() bool {
	m.steerMu.Lock()
	defer m.steerMu.Unlock()
	return m.steerCh != nil
}

// DrainSteer removes and returns all unconsumed steer messages. Called after
// a turn completes so the app layer can process any steer messages that
// arrived after the last PrepareStep fired (e.g. during a text-only response
// with no tool calls, or after the agent finished its last step).
func (m *Kit) DrainSteer() []agent.SteerMessage {
	m.steerMu.Lock()
	defer m.steerMu.Unlock()

	// First check leftover messages saved when generate() returned.
	if len(m.leftoverSteer) > 0 {
		msgs := m.leftoverSteer
		m.leftoverSteer = nil
		return msgs
	}

	// If a turn is still active, drain from the live channel.
	if m.steerCh != nil {
		var msgs []agent.SteerMessage
		for {
			select {
			case msg := <-m.steerCh:
				msgs = append(msgs, msg)
			default:
				return msgs
			}
		}
	}

	return nil
}

// PromptOptions configures a single PromptWithOptions call.
type PromptOptions struct {
	// SystemMessage is prepended as a system message before the user prompt.
	// Use it to inject per-call instructions or context without permanently
	// modifying the agent's system prompt.
	SystemMessage string
}

// PromptWithOptions sends a message with per-call configuration. It behaves
// like Prompt but allows injecting an additional system message before the
// user prompt. Both messages are persisted to the session.
func (m *Kit) PromptWithOptions(ctx context.Context, msg string, opts PromptOptions) (string, error) {
	var preMessages []fantasy.Message
	if opts.SystemMessage != "" {
		preMessages = append(preMessages, fantasy.NewSystemMessage(opts.SystemMessage))
	}
	preMessages = append(preMessages, fantasy.NewUserMessage(msg))

	result, err := m.runTurn(ctx, msg, msg, preMessages)
	if err != nil {
		return "", err
	}
	return result.Response, nil
}

// PromptResult sends a message and returns the full turn result including
// usage statistics and conversation messages. Use this instead of Prompt()
// when you need more than just the response text.
func (m *Kit) PromptResult(ctx context.Context, message string) (*TurnResult, error) {
	return m.runTurn(ctx, message, message, []fantasy.Message{
		fantasy.NewUserMessage(message),
	})
}

// PromptResultWithFiles sends a multimodal message (text + images) and returns
// the full turn result. The files parameter carries binary file data (e.g.
// clipboard images) that are included alongside the text in the user message.
func (m *Kit) PromptResultWithFiles(ctx context.Context, message string, files []LLMFilePart) (*TurnResult, error) {
	return m.runTurn(ctx, message, message, []fantasy.Message{
		fantasy.NewUserMessage(message, files...),
	})
}

// PromptResultWithMessages submits multiple user messages in a single turn.
// All messages are persisted to the session and sent to the agent together.
// The agent will respond once to the combined context of all messages.
// Returns the full turn result including usage statistics and conversation messages.
func (m *Kit) PromptResultWithMessages(ctx context.Context, messages []string) (*TurnResult, error) {
	if len(messages) == 0 {
		return nil, fmt.Errorf("no messages provided")
	}

	// Build prompt label from all messages
	promptLabel := strings.Join(messages, " | ")
	if len(promptLabel) > 100 {
		promptLabel = promptLabel[:100] + "..."
	}

	// Build LLM messages from all strings
	var preMessages []fantasy.Message
	for _, msg := range messages {
		preMessages = append(preMessages, fantasy.NewUserMessage(msg))
	}

	return m.runTurn(ctx, promptLabel, messages[len(messages)-1], preMessages)
}

// ClearSession resets the session's leaf pointer to the root, starting
// a fresh conversation branch.
func (m *Kit) ClearSession() {
	if m.session != nil {
		_ = m.session.Branch("")
	}
}

// GetModelString returns the current model string identifier (e.g.,
// "anthropic/claude-sonnet-4-5-20250929" or "openai/gpt-4") being used by the agent.
func (m *Kit) GetModelString() string {
	return m.modelString
}

// GetModelInfo returns detailed information about the current model
// (capabilities, pricing, limits). Returns nil if the model is not in the
// registry — this is expected for new models or custom fine-tunes.
func (m *Kit) GetModelInfo() *ModelInfo {
	provider, modelID, err := ParseModelString(m.modelString)
	if err != nil {
		return nil
	}
	return LookupModel(provider, modelID)
}

// IsReasoningModel returns true if the current model supports extended thinking / reasoning.
func (m *Kit) IsReasoningModel() bool {
	info := m.GetModelInfo()
	return info != nil && info.Reasoning
}

// GetThinkingLevel returns the current thinking level.
func (m *Kit) GetThinkingLevel() string {
	return viper.GetString("thinking-level")
}

// SetThinkingLevel changes the thinking level and recreates the agent with
// the new thinking budget. Returns an error if provider recreation fails.
//
// With message-level caching, both thinking and caching work together.
// Caching reduces costs by 60-90% for repeated context.
func (m *Kit) SetThinkingLevel(ctx context.Context, level string) error {
	viper.Set("thinking-level", level)
	// Recreate agent with new thinking config by re-running SetModel
	// with the same model string. SetModel rebuilds the provider and
	// passes the updated viper config (including thinking-level).
	return m.SetModel(ctx, m.modelString)
}

// GetTools returns all tools available to the agent (core + MCP + extensions).
func (m *Kit) GetTools() []Tool {
	return m.agent.GetTools()
}

// MaxTokens returns the effective max output tokens currently configured for
// the agent. This is the value actually sent to the LLM provider on each
// request, after CLI/env/config resolution, per-model overrides, model-aware
// right-sizing, and any Anthropic thinking-budget adjustments.
//
// Returns 0 when the active provider suppresses the max_output_tokens
// parameter (e.g. OpenAI Codex OAuth) or when no model is configured yet.
// A non-zero value is the number that will cause a FinishReasonLength
// truncation if the model tries to generate beyond it.
func (m *Kit) MaxTokens() int {
	if m.agent == nil {
		return 0
	}
	return m.agent.GetMaxTokens()
}

// MaxOutputLimit returns the catalog-reported output ceiling for the current
// model in tokens, or 0 when the model isn't in the registry (custom models,
// new releases, Ollama, etc.). Pair with MaxTokens() to detect when the agent
// is configured well below what the model supports and surface a hint to the
// user.
func (m *Kit) MaxOutputLimit() int {
	info := m.GetModelInfo()
	if info == nil {
		return 0
	}
	return info.Limit.Output
}

// extractFileParts returns all FilePart entries from a message's Content.
// Used to preserve image attachments when replacing user message text.
func extractFileParts(msg fantasy.Message) []fantasy.FilePart {
	var files []fantasy.FilePart
	for _, part := range msg.Content {
		if fp, ok := part.(fantasy.FilePart); ok {
			files = append(files, fp)
		}
	}
	return files
}

// Close cleans up resources including MCP server connections, model resources,
// and the tree session file handle. Should be called when the Kit instance is
// no longer needed. Returns an error if cleanup fails.
func (m *Kit) Close() error {
	// Emit SessionShutdown for extensions.
	if m.extRunner != nil && m.extRunner.HasHandlers(extensions.SessionShutdown) {
		_, _ = m.extRunner.Emit(extensions.SessionShutdownEvent{})
	}
	if m.session != nil {
		_ = m.session.Close()
	}
	// Release the OAuth callback port if we own the handler.
	if closer, ok := m.authHandler.(interface{ Close() error }); ok {
		_ = closer.Close()
	}
	return m.agent.Close()
}

// Conversion helpers are defined in adapter.go.