feat(skills): full agentskills.io spec compliance (#71 )

* feat(skills): full agentskills.io spec compliance - escape catalog XML and drop file:// prefix on <location> - skip skills missing a required description; add Skill.Validate - add license/compatibility/metadata/allowed-tools/disable-model-invocation frontmatter fields plus a malformed-YAML (unquoted colon) fallback - scan ~/.agents/skills and dedupe by name with project>user precedence - treat --skills-dir as a direct directory; add --skill-disable + DisableSkill/EnableSkill SDK methods - enumerate bundled resources via <skill_resources> on activation - add activate_skill MCP tool with enum-constrained name and session dedup - protect activated skill content from compaction pruning - gate project-local skills on a persisted trust allowlist via SkillTrustPrompt and an interactive CLI prompt - document new fields, flags, and SDK surface across README and docs site Fixes #65 * fix(skills): address skill loading and activation review findings - log (instead of discard) genuine errors from skill directory loads so permission/read failures no longer yield a silently partial catalog - make activate_skill dedup atomic by holding the lock across check and mark, preventing concurrent double-activation - reject activation of disable-model-invocation skills in the tool's runtime lookup, mirroring their catalog/enum exclusion - add regression test for disabled-skill activation
feat(sdk): harden pkg/kit embedder surface with scoped additions (#69 )
2026-06-19 13:54:20 +00:00 · 2026-06-18 19:37:53 +03:00 · 2026-06-18 18:18:54 +03:00 · 2026-06-18 14:46:03 +03:00 · 2026-06-18 12:42:11 +03:00 · 2026-06-18 12:37:37 +03:00
240 changed files with 33954 additions and 6688 deletions
@@ -39,6 +39,36 @@ jobs:
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

+  # Keep floating major/minor tags (e.g. v1, v1.2) pointing at the latest
+  # release so the composite action can be referenced as `mark3labs/kit@v1`.
+  action-tags:
+    runs-on: ubuntu-latest
+    needs: goreleaser
+    if: ${{ github.event_name == 'push' && needs.goreleaser.result == 'success' }}
+    permissions:
+      contents: write
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Update floating major/minor tags
+        env:
+          FULL_TAG: ${{ github.ref_name }}
+        run: |
+          set -euo pipefail
+          # FULL_TAG looks like v1.2.3 — derive v1 and v1.2.
+          VER="${FULL_TAG#v}"
+          MAJOR="v${VER%%.*}"
+          MINOR="v${VER%.*}"
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          for t in "$MAJOR" "$MINOR"; do
+            echo "Pointing $t at $FULL_TAG"
+            git tag -f "$t" "$FULL_TAG"
+            git push -f origin "refs/tags/$t"
+          done
+
  npm-publish:
    runs-on: ubuntu-latest
    needs: goreleaser
@@ -1,268 +0,0 @@
-//go:build ignore
-
-package main
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"os/exec"
-	"path/filepath"
-	"strings"
-	"time"
-
-	"kit/ext"
-)
-
-const (
-	diagnosticsTimeout = 20 * time.Second
-	maxOutputBytes     = 12_000
-)
-
-type toolPathInput struct {
-	Path string `json:"path"`
-}
-
-type lintResult struct {
-	Output string
-	Err    error
-}
-
-// Package-level state: set of .go files edited during the current agent turn.
-var editedFiles map[string]bool
-
-func Init(api ext.API) {
-	api.OnSessionStart(func(_ ext.SessionStartEvent, ctx ext.Context) {
-		ctx.Print("go-edit-lint extension loaded - will run gopls and golangci-lint after agent turns that edit Go files")
-	})
-
-	// Track edited .go files — don't lint yet.
-	api.OnToolResult(func(e ext.ToolResultEvent, ctx ext.Context) *ext.ToolResultResult {
-		if e.IsError || !isEditOrWrite(e.ToolName) {
-			return nil
-		}
-
-		absPath, ok := resolveGoFilePath(e.Input, ctx.CWD)
-		if !ok {
-			return nil
-		}
-
-		if editedFiles == nil {
-			editedFiles = make(map[string]bool)
-		}
-		editedFiles[absPath] = true
-		return nil
-	})
-
-	// After the agent turn ends, lint all collected files.
-	api.OnAgentEnd(func(e ext.AgentEndEvent, ctx ext.Context) {
-		if len(editedFiles) == 0 {
-			return
-		}
-
-		// Snapshot and reset immediately so the next turn starts clean.
-		files := editedFiles
-		editedFiles = nil
-
-		// Skip lint on errored turns.
-		if e.StopReason == "error" {
-			return
-		}
-
-		// Collect unique directories and file list for gopls.
-		var allGoplsOutput []string
-		for absPath := range files {
-			res := runGopls(ctx.CWD, absPath)
-			formatted := formatToolResult(res, "")
-			if formatted != "" {
-				allGoplsOutput = append(allGoplsOutput, fmt.Sprintf("# %s\n%s", filepath.Base(absPath), formatted))
-			}
-		}
-
-		lintRes := runGolangCILint(ctx.CWD, "./...")
-
-		goplsSection := "No diagnostics."
-		if len(allGoplsOutput) > 0 {
-			goplsSection = strings.Join(allGoplsOutput, "\n\n")
-		}
-		lintSection := formatToolResult(lintRes, "No lint issues.")
-
-		// Build file list for the report header.
-		var fileNames []string
-		for absPath := range files {
-			fileNames = append(fileNames, filepath.Base(absPath))
-		}
-
-		report := fmt.Sprintf(
-			"<go_diagnostics files=%q>\n[gopls]\n%s\n\n[golangci-lint]\n%s\n</go_diagnostics>",
-			strings.Join(fileNames, ", "),
-			goplsSection,
-			lintSection,
-		)
-
-		goplsIssues, lintIssues := countIssues(report)
-		hasIssues := goplsIssues > 0 || lintIssues > 0
-
-		if hasIssues {
-			// Show TUI block so the user sees it too.
-			var msgLines []string
-			msgLines = append(msgLines, fmt.Sprintf("Files: %s", strings.Join(fileNames, ", ")))
-			if goplsIssues > 0 {
-				msgLines = append(msgLines, fmt.Sprintf("gopls: %d issue(s)", goplsIssues))
-			}
-			if lintIssues > 0 {
-				msgLines = append(msgLines, fmt.Sprintf("golangci-lint: %d issue(s)", lintIssues))
-			}
-
-			borderColor := "#f9e2af" // yellow
-			if goplsIssues > 0 && lintIssues > 0 {
-				borderColor = "#f38ba8" // red
-			}
-
-			ctx.PrintBlock(ext.PrintBlockOpts{
-				Text:        strings.Join(msgLines, "\n"),
-				BorderColor: borderColor,
-				Subtitle:    "go-edit-lint",
-			})
-
-			// Inject a follow-up message so the agent fixes the issues.
-			ctx.SendMessage(report + "\n\n⚠️ DIAGNOSTICS FOUND: Please review and fix the issues above.")
-		} else {
-			ctx.PrintBlock(ext.PrintBlockOpts{
-				Text:        fmt.Sprintf("Files: %s\n✓ All clean", strings.Join(fileNames, ", ")),
-				BorderColor: "#a6e3a1",
-				Subtitle:    "go-edit-lint",
-			})
-		}
-	})
-}
-
-func isEditOrWrite(toolName string) bool {
-	return strings.EqualFold(toolName, "edit") || strings.EqualFold(toolName, "write")
-}
-
-func resolveGoFilePath(inputJSON, cwd string) (string, bool) {
-	var args toolPathInput
-	if err := json.Unmarshal([]byte(inputJSON), &args); err != nil || args.Path == "" {
-		return "", false
-	}
-
-	absPath := args.Path
-	if !filepath.IsAbs(absPath) {
-		absPath = filepath.Join(cwd, absPath)
-	}
-
-	if strings.ToLower(filepath.Ext(absPath)) != ".go" {
-		return "", false
-	}
-
-	return absPath, true
-}
-
-func runGopls(cwd, absPath string) lintResult {
-	ctx, cancel := context.WithTimeout(context.Background(), diagnosticsTimeout)
-	defer cancel()
-
-	cmd := exec.CommandContext(ctx, "gopls", "check", absPath)
-	cmd.Dir = cwd
-	out, err := cmd.CombinedOutput()
-
-	if ctx.Err() == context.DeadlineExceeded {
-		return lintResult{Err: fmt.Errorf("timed out after %s", diagnosticsTimeout)}
-	}
-
-	if err != nil {
-		return lintResult{Output: truncate(string(out), maxOutputBytes), Err: fmt.Errorf("failed to run gopls check: %w", err)}
-	}
-
-	return lintResult{Output: truncate(string(out), maxOutputBytes)}
-}
-
-func runGolangCILint(cwd, target string) lintResult {
-	ctx, cancel := context.WithTimeout(context.Background(), diagnosticsTimeout)
-	defer cancel()
-
-	args := []string{
-		"run",
-		target,
-		"--show-stats=false",
-		"--output.text.path", "stdout",
-		"--output.text.colors=false",
-		"--output.text.print-issued-lines=false",
-	}
-	cmd := exec.CommandContext(ctx, "golangci-lint", args...)
-	cmd.Dir = cwd
-	out, err := cmd.CombinedOutput()
-
-	if ctx.Err() == context.DeadlineExceeded {
-		return lintResult{Err: fmt.Errorf("timed out after %s", diagnosticsTimeout)}
-	}
-
-	trimmed := truncate(string(out), maxOutputBytes)
-	if err == nil {
-		return lintResult{Output: trimmed}
-	}
-
-	exitErr, ok := err.(*exec.ExitError)
-	if ok && exitErr.ExitCode() == 1 {
-		return lintResult{Output: trimmed}
-	}
-
-	return lintResult{Output: trimmed, Err: fmt.Errorf("failed to run golangci-lint: %w", err)}
-}
-
-func formatToolResult(res lintResult, emptyFallback string) string {
-	var lines []string
-	if res.Err != nil {
-		lines = append(lines, "ERROR: "+res.Err.Error())
-	}
-	out := strings.TrimSpace(res.Output)
-	if out == "" {
-		if res.Err == nil {
-			if emptyFallback != "" {
-				lines = append(lines, emptyFallback)
-			}
-		}
-	} else {
-		lines = append(lines, out)
-	}
-	if len(lines) == 0 {
-		return emptyFallback
-	}
-	return strings.Join(lines, "\n")
-}
-
-func truncate(s string, max int) string {
-	if len(s) <= max {
-		return s
-	}
-	return s[:max] + "\n... output truncated ..."
-}
-
-func countIssues(report string) (goplsCount, lintCount int) {
-	goplsStart := strings.Index(report, "[gopls]")
-	lintStart := strings.Index(report, "[golangci-lint]")
-	endTag := strings.Index(report, "</go_diagnostics>")
-
-	if goplsStart != -1 && lintStart != -1 {
-		goplsSection := report[goplsStart:lintStart]
-		for _, line := range strings.Split(goplsSection, "\n") {
-			line = strings.TrimSpace(line)
-			if line != "" && line != "[gopls]" && line != "No diagnostics." && !strings.HasPrefix(line, "#") {
-				goplsCount++
-			}
-		}
-	}
-
-	if lintStart != -1 && endTag != -1 {
-		lintSection := report[lintStart:endTag]
-		for _, line := range strings.Split(lintSection, "\n") {
-			line = strings.TrimSpace(line)
-			if line != "" && line != "[golangci-lint]" && line != "No lint issues." {
-				lintCount++
-			}
-		}
-	}
-
-	return goplsCount, lintCount
-}
@@ -0,0 +1,146 @@
+---
+description: Read-only audit for dead code, duplication, boundary violations, and refactor opportunities
+---
+
+Perform a comprehensive **read-only** audit of this repository and report
+findings. **Do not edit, rename, or delete any files.** Optional focus / scope
+hints from the user: $@
+
+## Scope
+
+If the user supplied focus hints above (a package path, a subsystem name, a
+concern like "TUI" or "extensions"), scope the audit accordingly. Otherwise
+audit the whole repo, prioritising the highest-traffic packages first
+(`cmd/`, `internal/`, `pkg/kit/` for this repo).
+
+## Steps
+
+1. **Map the repo first**:
+   - `ls` / `find` the top-level layout and list every Go package
+   - Read `AGENTS.md`, `README.md`, and any `pkg/*/doc.go` to understand the
+     intended architectural boundaries (SDK vs internal vs TUI vs cmd vs
+     extension surface)
+   - Note the public SDK surface (`pkg/kit/`) and any documented invariants
+     (e.g. "no dependency name leakage", "UI never imports extensions
+     directly") — these define what counts as a violation
+
+2. **Hunt for dead code**:
+   - Run `go vet ./...` and capture warnings
+   - Use `grep` to find exported symbols (`^func [A-Z]`, `^type [A-Z]`,
+     `^var [A-Z]`, `^const [A-Z]`) and cross-reference call sites. Symbols
+     with zero non-test references inside the module are suspects
+   - Check for unreferenced files, `// TODO: remove` markers, commented-out
+     blocks, and `_ = x` discard patterns
+   - If `staticcheck`, `deadcode`, or `unused` are available on PATH, run
+     them and include their output verbatim
+   - **Do not delete anything** — list candidates with file:line and a
+     confidence level (high / medium / low)
+
+3. **Find unnecessary duplication**:
+   - Look for near-identical function bodies, struct shapes, or switch
+     statements across packages — `grep` for repeated function signatures
+     and copy-pasted string literals / error messages is a fast first pass
+   - Distinguish *coincidental* duplication (two things that happen to look
+     alike but evolve independently) from *unnecessary* duplication (same
+     intent, drifting in lockstep) — only flag the latter
+   - For each cluster, propose where the extracted helper should live
+     (which package, which file) and whether it crosses a boundary
+
+4. **Check concerns / boundary violations**:
+   - **SDK leakage**: grep `pkg/kit/` for imports of `internal/...` types
+     in exported signatures, and for dependency-name leakage in exported
+     names / godoc (e.g. library jargon appearing in `LLM*` types)
+   - **UI ↔ extensions**: grep `internal/ui/` for any import of
+     `internal/extensions/` — per AGENTS.md the UI must not import
+     extensions directly; converters in `cmd/root.go` should bridge them
+   - **cmd vs internal**: business logic living in `cmd/` that should be
+     in `internal/` (and vice versa)
+   - **Cyclic risk**: packages that import each other transitively or that
+     reach across sibling boundaries unexpectedly
+   - For each violation, cite the offending import / signature with
+     file:line
+
+5. **Spot refactor opportunities**:
+   - Long functions (>80 lines) doing multiple unrelated things
+   - Deeply nested conditionals that flatten well with early returns
+   - Repeated `if err != nil { return fmt.Errorf("...: %w", err) }` chains
+     that could become helpers — but only where the wrapping context is
+     genuinely uniform
+   - Structs with too many fields that hint at split responsibilities
+   - Exported APIs that would be cleaner with options structs / functional
+     options
+   - Tests that share setup boilerplate ripe for a helper
+   - Flag each with: location, current shape (1-2 lines), proposed shape
+     (1-2 lines), and estimated risk (low / medium / high)
+
+6. **Cross-check against project rules**:
+   - Re-read `AGENTS.md` "Key Patterns" section and verify nothing in your
+     findings contradicts the documented gotchas (Yaegi interface ban,
+     `prog.Send()` from `Update()`, function-field bug, etc.) — if a
+     "refactor" would reintroduce a known pitfall, drop it from the report
+     and note why
+
+7. **Write the report** as your final message (do not write it to disk)
+   structured as:
+
+   ```
+   # Code Audit Report
+
+   ## Summary
+   - N dead-code candidates
+   - N duplication clusters
+   - N boundary violations
+   - N refactor opportunities
+
+   ## Dead Code
+   ### High confidence
+   - path/to/file.go:LINE — symbol — reason
+
+   ### Medium confidence
+   ...
+
+   ## Duplication
+   ### Cluster: <short name>
+   - Sites: file:line, file:line, …
+   - Suggested home: package/path
+   - Notes: …
+
+   ## Boundary Violations
+   - Rule: <which rule from AGENTS.md / project convention>
+   - Offender: file:line
+   - Fix sketch: …
+
+   ## Refactor Opportunities
+   - Location: file:line
+   - Current: …
+   - Proposed: …
+   - Risk: low/medium/high
+   - Why it's worth it: …
+
+   ## Suggested Next Steps
+   1. …
+   2. …
+   ```
+
+8. **End the report with an explicit reminder** that no files were modified,
+   and recommend the user pick the highest-leverage items to act on
+   manually (or via a follow-up `/fix-issue` style prompt) rather than
+   running a sweeping refactor.
+
+## Guidelines
+
+- **Read-only, always**: no `edit`, no `write`, no `git commit`, no `go mod
+  tidy`. Use only `read`, `grep`, `find`, `ls`, and read-only `bash`
+  commands (`go vet`, `go build -o /tmp/...`, `staticcheck`, etc.)
+- **Cite every finding** with `path/to/file.go:LINE` so the user can jump
+  straight to it
+- **Be honest about confidence**: false positives in a code audit are
+  expensive — prefer "medium confidence, worth a look" over confidently
+  wrong claims
+- **Quantity isn't quality**: 10 sharp findings beat 100 nitpicks. Cut
+  anything that's purely stylistic unless it directly causes one of the
+  four issue categories above
+- **Skip generated code** (`*.pb.go`, `*_gen.go`, anything under
+  `vendor/`) and obvious third-party copies
+- **Don't propose architectural rewrites** — stay within the existing
+  shape of the repo and recommend incremental, reviewable changes
@@ -0,0 +1,47 @@
+---
+description: Open a GitHub PR for the current branch using the repo's PR template
+---
+
+Open a GitHub pull request for the current branch, filling out the repository's PR template with a description grounded in the actual commits and diff.
+
+## Steps
+
+1. **Verify the branch is pushed**:
+   - `git status -sb` and `git log @{u}..HEAD --oneline 2>/dev/null` — if there is no upstream or unpushed commits, run `git push -u origin "$(git branch --show-current)"` first
+   - If the working tree is dirty, stop and tell the user to commit first (suggest `/commit-push`)
+2. **Gather context**:
+   - `git log origin/main..HEAD --oneline` — list of commits going into the PR
+   - `git diff origin/main...HEAD --stat` then `git diff origin/main...HEAD` — read the actual changes
+   - Identify the linked issue (from commit messages, branch name, or extra user input: $@) — capture as `Fixes #N` if applicable
+3. **Locate the PR template**:
+   - Check `.github/pull_request_template.md`, `.github/PULL_REQUEST_TEMPLATE.md`, or `docs/pull_request_template.md`
+   - If none exists, use a minimal `## Description` / `## Type of Change` / `## Checklist` structure
+4. **Draft the PR body** by filling out the template:
+   - **Description**: 1–3 short paragraphs explaining *what* changed and *why*, grounded in the diff. Include a brief before/after example for new APIs when useful.
+   - **Fixes #N**: only if there is a real linked issue
+   - **Type of Change**: tick the single most accurate box with `[x]` (leave others as `[ ]`)
+   - **Checklist**: tick items that are genuinely true (style, self-review, tests added, docs updated)
+   - **Additional Information**: bullet list of added / modified files and any backward-compatibility notes
+   - Remove template sections explicitly marked "remove if not applicable" (e.g. MCP Spec Compliance) when they don't apply
+5. **Write the body to a temp file**: `/tmp/pr-body-<branch-or-issue>.md` — never inline a long body via `--body`, always use `--body-file`
+6. **Choose the title**: prefer the subject of the primary commit if it already follows Conventional Commits; otherwise craft one in the same style (`<type>(<scope>): <imperative summary>`, ≤72 chars)
+7. **Create the PR**:
+   ```
+   gh pr create \
+     --title "<title>" \
+     --body-file /tmp/pr-body-<...>.md \
+     --base main \
+     --head "$(git branch --show-current)"
+   ```
+   Use the repo's actual default branch if it isn't `main` (`gh repo view --json defaultBranchRef -q .defaultBranchRef.name`)
+8. **Report the PR URL** returned by `gh` and stop
+
+## Guidelines
+
+- Read the diff and commit messages — do **not** invent features that aren't in the code
+- One PR per logical change; if the branch contains unrelated commits, surface that and ask before continuing
+- Keep the description focused on reviewer-relevant information (what / why), not a replay of the diff
+- Only check checklist boxes that are actually satisfied; leave the rest unchecked rather than lying
+- If `gh` is not authenticated (`gh auth status` fails), stop and tell the user
+
+$@
@@ -16,7 +16,7 @@ This prompt uses the `feature_request` GitHub template which requires:

 ## Steps

-1. **Understand the request** from `$@`
+1. **Understand the request** from the user input: $@
   - What capability is missing?
   - What would the ideal behavior look like?

@@ -16,7 +16,7 @@ This repository has structured issue templates. You MUST use the appropriate tem

 ## Steps

-1. **Determine the issue type** from `$@`:
+1. **Determine the issue type** from the user input: $@
   - Bug → use `--template bug_report`
   - Feature → use `--template feature_request`  
   - Documentation → use `--template documentation`
@@ -0,0 +1,61 @@
+---
+description: Implement the fix/feature/docs change requested by a GitHub issue
+---
+
+Resolve GitHub issue #$1 by reading it, classifying it, and producing the appropriate code or doc change. **Stop once the working tree contains the change** — committing, pushing, and opening a PR are handled by `/commit-push` and `/create-pr`.
+
+## Steps
+
+1. **Fetch the issue**:
+   - Run: gh issue view $1 --json number,title,body,labels,state,author,comments
+   - If the issue is closed, stop and ask the user whether to proceed
+   - Read the **entire** thread including comments — the latest comment often refines the ask
+
+2. **Classify the issue** from labels, title prefix, and body content:
+   - `bug` / `fix:` → reproduce, then fix
+   - `enhancement` / `feature` / `feat:` → design, then implement
+   - `documentation` / `docs:` → locate and update docs
+   - `question` / `discussion` → answer in a comment, do **not** write code
+   - Anything else → ask the user how to proceed
+
+3. **Create a working branch** off the default branch:
+   - `git checkout main && git pull --ff-only`
+   - Branch name: <type>/$1-<slug> (e.g. `fix/42-borderColor-ignored`, `feat/57-keyboard-clear`, `docs/63-widget-lifecycle`)
+
+4. **Do the work** based on type:
+
+   ### Bug (`bug` label / `fix:` title)
+   - Reproduce the failure first (write a failing test if feasible) — if you cannot reproduce, comment on the issue asking for clarification and stop
+   - Locate the root cause; do not patch symptoms
+   - Add or extend a regression test that fails before and passes after the fix
+   - Run `go test ./... -race` and `golangci-lint run`
+
+   ### Feature (`enhancement` / `feature` label / `feat:` title)
+   - Re-read the motivation and proposed implementation in the issue body
+   - For large, ambiguous, or breaking changes, sketch the design in a comment on the issue and wait for sign-off before writing code
+   - Implement behind sensible defaults; add godoc on every exported symbol
+   - Add unit tests covering the new behaviour and edge cases
+   - Update `README.md` / `docs/` if the public surface changed
+   - Run `go test ./... -race` and `golangci-lint run`
+
+   ### Documentation (`documentation` label / `docs:` title)
+   - Open the file/URL referenced in the issue's "Documentation Location"
+   - Apply the suggested improvement; verify code samples compile (`go build ./...`)
+   - No tests required, but run `golangci-lint run` if Go files were touched
+
+5. **Report**:
+   - Branch name (`git branch --show-current`)
+   - Summary of files changed (`git status -s`) and the diff highlights
+   - Test/lint results (pass/fail with key output)
+   - Suggest the next step explicitly:
+     - `/commit-push` to commit with a Conventional Commit subject (the message should reference `(#$1)` and include `Fixes #$1` so merge auto-closes)
+     - then `/create-pr $1` to open the pull request
+
+## Guidelines
+
+- This prompt **stops at a clean working tree with the change applied** — do not run `git commit`, `git push`, or `gh pr create`
+- If the issue is unclear, post a clarifying comment on the issue and stop; do not guess
+- Keep the change scoped to the issue; surface unrelated cleanups separately
+- For breaking changes or architecture shifts, propose the design on the issue first and wait for maintainer sign-off
+- If the issue is a duplicate or already fixed on `main`, comment with the reference and stop
+- Do not close the issue manually — the eventual PR's `Fixes #$1` handles that on merge
@@ -16,28 +16,64 @@ It becomes a `/slug` slash command in the kit input box — typed as `/filename`
 description: One-line description shown in autocomplete
 ---

-Body text of the prompt. Use $@ for all user-supplied arguments,
-$1 $2 etc. for positional arguments.
+Body text of the prompt. Reference user-supplied arguments
+with positional placeholders (see "Argument placeholders" below).
 ```

 - **Filename** → slug: `commit-push.md` becomes `/commit-push`
 - **Frontmatter**: only `description` is recognised; keep it under ~80 chars
 - **Body**: plain markdown; the full text is submitted as the user's message when the template fires
- **Arguments**: `$@` expands to everything the user typed after the slash command name;
-  `$1`, `$2` for individual positional args; omit entirely if no arguments are needed
+- **Required args**: kit infers required positional args from the highest `$N` it finds *outside* backtick/tilde code fences — a stray `$2` in active prose means kit will refuse to run without 2 arguments
+
+## Argument placeholders
+
+kit performs shell-style substitution before sending the prompt to the model:
+
+- `$1`, `$2`, … — positional arguments (1-indexed)
+- `${1}`, `${2}`, … — same, brace form (use when followed by digits/letters: `${1}_suffix`)
+- `$@` — all arguments joined by spaces (zero or more, optional)
+- `$+` — all arguments, **at least one required**
+- `$ARGUMENTS` / `${ARGUMENTS}` — alias for `$@`
+- `${@:N}` — args from the Nth onwards (1-indexed, bash-style)
+- `${@:N:L}` — `L` args starting from the Nth
+
+### ⚠️ Critical: code fences and inline code preserve placeholders verbatim
+
+Anything inside triple-backtick fences, `~~~` fences, or single-backtick `inline` code spans is **left untouched** so example code samples don't get corrupted. That means:
+
+- An inline-coded `gh issue view $1` stays literal `$1` in the model's input ❌
+- The same command without backticks: gh issue view $1 → expands to `gh issue view 42` ✓
+
+**Rule of thumb:** if you want a placeholder to substitute, keep it outside backticks and fences. If you want a literal `$1` in the output (e.g. teaching the user shell syntax), put it inside backticks.
+
+### Workarounds for "I want it to look like code AND substitute"
+
+1. **Drop the backticks** around just the placeholder portion — the rest can still read as a command line in prose
+2. **Use a 4-space-indented code block** instead of a triple-backtick fence — kit only skips backtick/tilde fences, so indentation-style code blocks still get substitution:
+
+       git push -u origin "$(git branch --show-current)"
+       gh pr create --title "fix: ... (#$1)" --base main
+
+3. **Bind once, reference loosely**: put `Issue: $1` at the top in prose, then leave the backticked examples literal — the model will substitute mentally

 ## Steps

-1. **Understand the workflow** the user described in `$@` — ask a clarifying question if the intent is ambiguous
+1. **Understand the workflow** the user described in $@ — ask a clarifying question if the intent is ambiguous
 2. **Choose a filename**: short, lowercase, hyphen-separated, descriptive (e.g. `code-review.md`)
 3. **Write the description**: one sentence, imperative, fits in autocomplete
-4. **Draft the body**:
-   - Open with a single sentence stating the goal
+4. **Decide on arguments**:
+   - No args needed → omit placeholders entirely
+   - One required value (issue number, PR url, file path) → use `$1`
+   - Free-form trailing context → end with a single `$@` line
+   - Multiple distinct values → use `$1`, `$2`, … and document each at the top
+5. **Draft the body**:
+   - Open with a single sentence stating the goal, weaving in `$1`/`$@` where the value belongs
   - Use `## Steps` for multi-step workflows; use plain prose for simple prompts
   - Be specific: name commands, flags, and file paths where relevant
-   - End with `$@` on its own line if the user might want to pass context or a hint; omit if the prompt is self-contained
-5. **Write the file** to `.kit/prompts/<slug>.md`
-6. **Confirm** by showing the final file content and the slash command that activates it
+   - **Audit every backtick and code fence**: any `$N` or `$@` inside them will not expand — was that intentional? If not, apply one of the workarounds above
+6. **Write the file** to `.kit/prompts/<slug>.md`
+7. **Verify substitution** by mentally (or actually) replacing `$1`/`$@` with a sample value and confirming every reference resolves — and that the prompt's *own* example snippets don't accidentally bump the required-arg count (wrap illustrative `$N` examples in triple-backtick fences, not 4-space indentation, so `RequiredArgs()` ignores them)
+8. **Confirm** by showing the final file content and the slash command that activates it (e.g. `/code-review 42`)

 ## Guidelines

@@ -45,3 +81,4 @@ $1 $2 etc. for positional arguments.
 - Prefer concrete steps over vague instructions
 - A prompt that does one thing well beats one that tries to cover every edge case
 - If the workflow already exists as a prompt, suggest extending it instead of duplicating
+- When in doubt about substitution behaviour, write the file and run `/<slug> testvalue` once to confirm — wrong placement of backticks is the #1 failure mode
@@ -0,0 +1,52 @@
+---
+description: Audit and update project documentation (README and docs site) for a recent change
+---
+
+Review recent code changes, identify all documentation surfaces that should
+mention them, and update each one — grounded in the actual diff, not guesses.
+
+## Steps
+
+1. **Identify the change**:
+   - If the user input ($@) names a commit / PR / branch / topic, use that as the focus
+   - Otherwise inspect `git log origin/main..HEAD --oneline` and `git diff origin/main...HEAD --stat` to discover what shipped on the current branch
+   - Read the actual diff (`git diff origin/main...HEAD`) — never document features that aren't in the code
+
+2. **Inventory the doc surfaces**:
+   - `README.md` at the repo root
+   - Any docs site (commonly `www/`, `docs/`, `site/`) — list its pages and identify the one(s) most thematically related to the change
+   - Inline godoc / API reference comments on the new exported symbols
+   - `CHANGELOG.md` if the project keeps one
+   - Any `examples/` directory entries that demonstrate the affected area
+
+3. **Audit each surface** with `grep`:
+   - Search for the names of related existing APIs (e.g. if you added `IterTools`, grep for `ListTools`) to find every page that already discusses the area
+   - Decide for each hit: does it need a cross-reference, a side-by-side comparison, or to stay untouched?
+
+4. **Decide where new content lives**:
+   - Prefer extending an existing page over creating a new one
+   - For a docs site, place new sections near related content (check the page's `## Heading` outline first)
+   - Skip surfaces that genuinely don't apply (e.g. a server-focused README for a client-only change) and say so explicitly
+
+5. **Draft the updates**:
+   - Lead with a one-sentence statement of what's new and why
+   - Show concrete code examples copied from real signatures — verify against the source files
+   - Include a comparison / "when to use which" table when adding an alternative to an existing API
+   - Note backwards-compatibility behaviour if relevant
+
+6. **Verify the docs build** before committing:
+   - For vocs / docusaurus / mkdocs sites, run the local build command (e.g. `npx vocs build`, `mkdocs build`) and fix any MDX/markdown errors
+   - For godoc, run `go vet ./...` and `go doc <pkg> <Symbol>` to sanity-check rendering
+
+7. **Report**:
+   - List every file changed and every file deliberately left alone (with a one-line reason)
+   - Suggest the next step (typically `/commit-push`) — do not auto-commit unless asked
+
+## Guidelines
+
+- Read the diff before writing anything — invented API names erode trust faster than missing docs
+- One change per doc commit; keep doc updates separate from code changes when possible
+- Match the existing voice and formatting of each surface (headings, code-fence languages, table styles)
+- Prefer linking between pages over duplicating content
+
+$@
@@ -1,8 +0,0 @@
-{
-  "$schema": "https://opencode.ai/config.json",
-  "permission": {
-    "external_directory": {
-      "~/go/**": "deny"
-    }
-  }
-}
@@ -1,80 +0,0 @@
-# Autoscroll Fix - Final Summary
-
-## Root Cause
-
-The autoscroll was failing for streaming assistant messages due to a bug in how `GotoBottom()` calculated item heights.
-
-### The Problem
-
-1. **Reasoning blocks** (`StreamingMessageItem` with `role="reasoning"`) are **never cached** because they have live duration counters that update every render
-2. The `Height()` method returns `0` when `cachedRender == ""`
-3. `GotoBottom()` was calling:
-   ```go
-   itemHeight := item.Height()  // Returns 0 for reasoning
-   if itemHeight == 0 {
-       item.Render(s.width)  // Renders but doesn't cache (reasoning)
-       itemHeight = item.Height()  // Still returns 0!
-   }
-   ```
-4. This caused incorrect scroll position calculations, especially during reasoning → assistant transitions
-
-## The Solution
-
-Changed `GotoBottom()` and `AtBottom()` to calculate height **directly from the rendered string** instead of relying on the cached height:
-
-```go
-// OLD: item.Height() which checks cached render
-itemHeight := item.Height()
-if itemHeight == 0 {
-    item.Render(s.width)
-    itemHeight = item.Height()  // Still might be 0!
-}
-
-// NEW: Calculate from rendered string directly
-rendered := item.Render(s.width)
-itemHeight := strings.Count(rendered, "\n") + 1
-```
-
-This works for **all** items regardless of whether they cache their render or not.
-
-## Files Changed
-
-### `internal/ui/scrolllist.go`
- **`GotoBottom()`**: Calculate height from rendered string (2 loops)
- **`AtBottom()`**: Calculate height from rendered string (1 loop)
-
-### `internal/ui/model.go`
- **`appendStreamingChunk()`**: For existing messages, call `GotoBottom()` directly (iteratr pattern)
- **`refreshContent()`**: Simplified to only call `SetItems()` (removed redundant `GotoBottom()`)
- **Bash streaming handler**: Removed redundant `GotoBottom()` after `refreshContent()`
-
-## Testing Results
-
-✅ **Test prompt**: "explore this repo"
-
-**Before fix**:
- Autoscroll stopped after reasoning block completed
- Viewport stuck showing end of reasoning ("Thought for 203ms")
- Assistant response streamed off-screen below
-
-**After fix**:
- Autoscroll works throughout reasoning block
- Autoscroll continues during reasoning → assistant transition  
- Viewport stays at bottom showing latest assistant content
- Final position shows end of response (build commands section)
-
-## Behavior Verified
-
-1. ✅ Streaming text auto-scrolls to bottom
-2. ✅ Works across reasoning → assistant transition
-3. ✅ Manual scroll up (PgUp) disables autoscroll
-4. ✅ Scroll to bottom (Alt+End) re-enables autoscroll
-5. ✅ Accurate positioning with no offset errors
-
-## Performance Note
-
-The fix calls `Render()` on all items during `GotoBottom()` calculations. This is acceptable because:
- `Render()` is already optimized with caching for non-reasoning items
- `GotoBottom()` is only called during content updates (not every frame)
- Reasoning blocks need to render anyway for live duration updates
- This matches iteratr's approach of ensuring items are rendered before height calculations
@@ -18,7 +18,8 @@ A powerful, extensible AI coding agent CLI with multi-provider support, built-in
 ## Features

 - **Multi-Provider LLM Support**: Anthropic, OpenAI, Google Gemini, Ollama, Azure OpenAI, AWS Bedrock, OpenRouter, and more
- **Built-in Core Tools**: bash, read, write, edit, grep, find, ls, subagent - no MCP overhead
+- **Built-in Core Tools**: bash (with interactive sudo password prompt), read, write, edit, grep, find, ls, subagent - no MCP overhead
+- **Smart @ Attachments**: Binary files auto-detected via MIME type, MCP resources via `@mcp:server:uri`
 - **MCP Integration**: Connect external MCP servers for expanded capabilities
 - **Extension System**: Write custom tools, commands, widgets, and UI modifications in Go
 - **Theming**: 22 built-in color themes (KITT, Catppuccin, Dracula, Nord, etc.) with runtime switching, persistence, and custom theme files
@@ -27,8 +28,9 @@ A powerful, extensible AI coding agent CLI with multi-provider support, built-in
 - **Interactive TUI**: Rich terminal interface powered by Bubble Tea with streaming, syntax highlighting, and custom rendering
 - **Session Management**: Tree-based conversation history with branching support
 - **Non-Interactive Mode**: Script-friendly positional args with JSON output
+- **GitHub Integration**: Scaffold a GitHub Actions workflow with `kit github install` to run Kit as a collaborator/reviewer on `/kit` comments
 - **ACP Server**: Run Kit as an [Agent Client Protocol](https://agentclientprotocol.com) agent over stdio
- **Go SDK**: Embed Kit in your own applications
+- **Go SDK**: Embed Kit in your own applications with full agent lifecycle events (30+ event types) and behavior-modifying hooks

 ## Installation

@@ -125,8 +127,22 @@ model: anthropic/claude-sonnet-latest
 max-tokens: 4096
 temperature: 0.7
 stream: true
+thinking-level: off       # off, none, minimal, low, medium, high
+no-core-tools: false      # set to true to disable all built-in core tools
+
+# Skills — all keys are optional
+no-skills: false          # set to true to disable all skill loading
+skill:                    # explicit skill files/dirs (disables auto-discovery)
+  - /path/to/skill.md
+skills-dir: ""            # scan this directory directly for skills (overrides auto-discovery)
+skill-disable:            # hide skills from the model catalog by name (still usable via /skill:)
+  - some-skill
 ```

+All of the above keys can also be set programmatically via the SDK
+(`kit.Options.MaxTokens`, `Options.Temperature`, `Options.ThinkingLevel`, etc.)
+without touching config files — see [SDK options](#with-options).
+
 ### Environment Variables

 ```bash
@@ -151,6 +167,16 @@ mcpServers:
  search:
    type: remote
    url: "https://mcp.example.com/search"
+
+  pubmed:
+    type: remote
+    url: "https://pubmed.mcp.example.com"
+    noOAuth: true  # skip OAuth for public servers that don't require auth
+
+  builds:
+    type: remote
+    url: "https://builds.mcp.example.com"
+    tasksMode: always  # async task execution — see MCP Tasks below
 ```

 ## CLI Reference
@@ -179,19 +205,28 @@ mcpServers:
 --compact                Enable compact output mode
 --auto-compact           Auto-compact conversation near context limit

-# Extensions
+# Extensions and tools
 --extension, -e          Load additional extension file(s) (repeatable)
 --no-extensions          Disable all extensions
+--no-core-tools          Disable all built-in core tools (bash, read, write, edit, grep, find, ls, subagent)
 --prompt-template        Load a specific prompt template by name
 --no-prompt-templates    Disable prompt template loading

+# Skills
+--skill                  Load skill file or directory (repeatable)
+--skills-dir             Scan this directory directly for skills (overrides auto-discovery)
+--skill-disable          Hide a skill from the model catalog by name (repeatable); still usable via /skill:
+--no-skills              Disable skill loading (auto-discovery and explicit)
+
 # Generation parameters
--max-tokens             Maximum tokens in response (default: 4096)
+--max-tokens             Maximum tokens in response (default: 8192, auto-raised up to 32768 for models with larger known output limits)
 --temperature            Randomness 0.0-1.0 (default: 0.7)
 --top-p                  Nucleus sampling 0.0-1.0 (default: 0.95)
 --top-k                  Limit top K tokens (default: 40)
 --stop-sequences         Custom stop sequences (comma-separated)
--thinking-level         Extended thinking level: off, minimal, low, medium, high (default: off)
+--frequency-penalty      Penalize frequent tokens 0.0-2.0 (default: 0.0)
+--presence-penalty       Penalize present tokens 0.0-2.0 (default: 0.0)
+--thinking-level         Extended thinking level: off, none, minimal, low, medium, high (default: off)

 # System
 --config                 Config file path (default: ~/.kit.yml)
@@ -203,9 +238,14 @@ mcpServers:

 ```bash
 # Authentication (for OAuth-enabled providers)
-kit auth login [provider]    # Start OAuth flow (e.g., anthropic)
-kit auth logout [provider]   # Remove credentials for provider
-kit auth status              # Check authentication status
+kit auth login [provider]          # Start OAuth flow (e.g., anthropic)
+kit auth login [provider] --set-default  # Set provider's default model as system default
+kit auth logout [provider]         # Remove credentials for provider
+kit auth status                    # Check authentication status
+
+# GitHub Copilot login (experimental; requires active Copilot subscription)
+kit auth login copilot
+kit --model copilot/gpt-5.5 "Hello"

 # Model database
 kit models [provider]        # List available models (optionally filter by provider)
@@ -224,6 +264,12 @@ kit install --uninstall <pkg> # Remove an installed package
 # Skills
 kit skill                    # Install the Kit extensions skill via skills.sh

+# GitHub integration
+kit github install           # Scaffold .github/workflows/kit.yml (run Kit on '/kit' comments)
+kit github install --model anthropic/claude-sonnet-4-5-20250929
+kit github install --force   # Overwrite an existing workflow file
+kit github install --no-secret # Skip the offer to set the provider secret via the gh CLI
+
 # ACP server
 kit acp                      # Start as ACP agent (stdio JSON-RPC)
 kit acp --debug              # With debug logging to stderr
@@ -287,12 +333,15 @@ kit -e examples/extensions/minimal.go

 ### Extension Capabilities

-**Lifecycle Events**: OnSessionStart, OnSessionShutdown, OnBeforeAgentStart, OnAgentStart, OnAgentEnd, OnToolCall, OnToolExecutionStart, OnToolOutput, OnToolExecutionEnd, OnToolResult, OnInput, OnMessageStart, OnMessageUpdate, OnMessageEnd, OnModelChange, OnContextPrepare, OnBeforeFork, OnBeforeSessionSwitch, OnBeforeCompact, OnCustomEvent, OnSubagentStart, OnSubagentChunk, OnSubagentEnd
+**Lifecycle Events**: OnSessionStart, OnSessionShutdown, OnBeforeAgentStart, OnAgentStart, OnAgentEnd, OnLLMUsage, OnToolCall, OnToolCallInputStart, OnToolCallInputDelta, OnToolCallInputEnd, OnToolExecutionStart, OnToolOutput, OnToolExecutionEnd, OnToolResult, OnInput, OnMessageStart, OnMessageUpdate, OnMessageEnd, OnModelChange, OnContextPrepare, OnBeforeFork, OnBeforeSessionSwitch, OnBeforeCompact, OnCustomEvent, OnSubagentStart, OnSubagentChunk, OnSubagentEnd
+
+`OnAgentEnd` carries per-turn aggregates (`ToolCallCount`, `ToolNames`, `LLMCallCount`, `InputTokensDelta`, `OutputTokensDelta`, `CostDelta`, `DurationMs`) so observers don't need to maintain parallel bookkeeping. `OnLLMUsage` fires after each LLM provider call with token + cost deltas attributed to that specific call/model — use it for accurate budget enforcement *between* calls instead of waiting for the turn to finish.

 **Custom Components**:
 - **Tools**: Add new tools the LLM can invoke
 - **Commands**: Register slash commands (e.g., `/mycommand`)
 - **Options**: Register configurable extension options
+- **Session State**: Last-write-wins key-value store via `ctx.SetState` / `GetState` / `DeleteState` / `ListState`, persisted to a per-session sidecar file outside the conversation tree
 - **Widgets**: Persistent status displays above/below input
 - **Headers/Footers**: Persistent content above/below the conversation
 - **Status Bar**: Custom status bar entries
@@ -317,39 +366,41 @@ kit -e examples/extensions/minimal.go

 See the `examples/extensions/` directory:

- `minimal.go` - Clean UI with custom footer
- `auto-commit.go` - Auto-commit on shutdown
- `bookmark.go` - Bookmark conversations
- `branded-output.go` - Branded output rendering
- `compact-notify.go` - Notification on compaction
- `confirm-destructive.go` - Confirm destructive operations
- `context-inject.go` - Inject context into conversations
- `conversation-manager.go` - **NEW** Tree navigation, branch summarization, and fresh context loops
- `custom-editor-demo.go` - Vim-like modal editor
- `dev-reload.go` - Development live-reload
- `header-footer-demo.go` - Custom headers and footers
- `inline-bash.go` - Inline bash execution
- `interactive-shell.go` - Interactive shell integration
- `kit-kit.go` - Kit-in-Kit (sub-agent spawning)
- `lsp-diagnostics.go` - LSP diagnostic integration
- `notify.go` - Desktop notifications
- `overlay-demo.go` - Modal dialogs
- `permission-gate.go` - Permission gating for tools
- `pirate.go` - Pirate-themed personality
- `plan-mode.go` - Read-only planning mode
- `project-rules.go` - Project-specific rules
- `prompt-demo.go` - Interactive prompts (select/confirm/input)
- `prompt-templates.go` - **NEW** Frontmatter-driven templates with model switching and skill injection
- `protected-paths.go` - Path protection for sensitive files
- `subagent-widget.go` - Multi-agent orchestration with status widget
- `subagent-test.go` - Subagent testing utilities
- `summarize.go` - Conversation summarization
- `tool-logger.go` - Log all tool calls
- `neon-theme.go` - Custom theme registration and switching
- `tool-renderer-demo.go` - Custom tool call rendering
- `widget-status.go` - Persistent status widgets
+- [`minimal.go`](examples/extensions/minimal.go) - Clean UI with custom footer
+- [`auto-commit.go`](examples/extensions/auto-commit.go) - Auto-commit on shutdown
+- [`bookmark.go`](examples/extensions/bookmark.go) - Bookmark conversations
+- [`branded-output.go`](examples/extensions/branded-output.go) - Branded output rendering
+- [`bridge-demo.go`](examples/extensions/bridge_demo.go) - Bridged SDK API demo (tree navigation, skills, templates, model resolution)
+- [`compact-notify.go`](examples/extensions/compact-notify.go) - Notification on compaction
+- [`confirm-destructive.go`](examples/extensions/confirm-destructive.go) - Confirm destructive operations
+- [`context-inject.go`](examples/extensions/context-inject.go) - Inject context into conversations
+- [`conversation-manager.go`](examples/extensions/conversation-manager.go) - **NEW** Tree navigation, branch summarization, and fresh context loops
+- [`custom-editor-demo.go`](examples/extensions/custom-editor-demo.go) - Vim-like modal editor
+- [`dev-reload.go`](examples/extensions/dev-reload.go) - Development live-reload
+- [`header-footer-demo.go`](examples/extensions/header-footer-demo.go) - Custom headers and footers
+- [`inline-bash.go`](examples/extensions/inline-bash.go) - Inline bash execution
+- [`interactive-shell.go`](examples/extensions/interactive-shell.go) - Interactive shell integration
+- [`kit-kit.go`](examples/extensions/kit-kit.go) - Kit-in-Kit (sub-agent spawning)
+- [`lsp-diagnostics.go`](examples/extensions/lsp-diagnostics.go) - LSP diagnostic integration
+- [`notify.go`](examples/extensions/notify.go) - Desktop notifications
+- [`overlay-demo.go`](examples/extensions/overlay-demo.go) - Modal dialogs
+- [`permission-gate.go`](examples/extensions/permission-gate.go) - Permission gating for tools
+- [`pirate.go`](examples/extensions/pirate.go) - Pirate-themed personality
+- [`plan-mode.go`](examples/extensions/plan-mode.go) - Read-only planning mode
+- [`project-rules.go`](examples/extensions/project-rules.go) - Project-specific rules
+- [`prompt-demo.go`](examples/extensions/prompt-demo.go) - Interactive prompts (select/confirm/input)
+- [`prompt-templates.go`](examples/extensions/prompt-templates.go) - **NEW** Frontmatter-driven templates with model switching and skill injection
+- [`protected-paths.go`](examples/extensions/protected-paths.go) - Path protection for sensitive files
+- [`subagent-widget.go`](examples/extensions/subagent-widget.go) - Multi-agent orchestration with status widget
+- [`subagent-test.go`](examples/extensions/subagent-test.go) - Subagent testing utilities
+- [`summarize.go`](examples/extensions/summarize.go) - Conversation summarization
+- [`tool-logger.go`](examples/extensions/tool-logger.go) - Log all tool calls
+- [`neon-theme.go`](examples/extensions/neon-theme.go) - Custom theme registration and switching
+- [`tool-renderer-demo.go`](examples/extensions/tool-renderer-demo.go) - Custom tool call rendering
+- [`usage-budget.go`](examples/extensions/usage-budget.go) - Per-call usage callback (`OnLLMUsage`), session state, and enriched `OnAgentEnd` per-turn report
+- [`widget-status.go`](examples/extensions/widget-status.go) - Persistent status widgets

-Also see `.kit/extensions/go-edit-lint.go` (in this repo) for a project-local extension example that runs gopls and golangci-lint on Go file edits.
+Also see [`.kit/extensions/go-edit-lint.go`](.kit/extensions/go-edit-lint.go) (in this repo) for a project-local extension example that runs gopls and golangci-lint on Go file edits.

 ### Loading Extensions

@@ -406,7 +457,7 @@ func TestMyExtension(t *testing.T) {
 - `AssertPrinted()`, `AssertPrintedContains()` — Verify output
 - `AssertToolRegistered()`, `AssertCommandRegistered()` — Verify registration

-See `examples/extensions/tool-logger_test.go` for a complete example with 14 test cases covering tool calls, input handling, and session lifecycle.
+See [`examples/extensions/tool-logger_test.go`](examples/extensions/tool-logger_test.go) for a complete example with 14 test cases covering tool calls, input handling, and session lifecycle.

 ### Prompt Templates

@@ -428,12 +479,57 @@ Focus on $1 specifically.

 **Argument placeholders:**
 - `$1`, `$2`, etc. — Individual arguments
- `$@` or `$ARGUMENTS` — All arguments
+- `$@` or `$ARGUMENTS` — All arguments (zero or more)
+- `$+` — All arguments (one or more required; error if none given)
 - `${@:2}` — Arguments from position 2 onwards
 - `${@:1:3}` — 3 arguments starting at position 1

+Placeholders inside fenced code blocks (```) and inline code spans are ignored.
+
 Disable templates with `--no-prompt-templates` or load a specific template with `--prompt-template <name>`.

+## GitHub Integration
+
+Kit can run as an automated collaborator/reviewer inside GitHub Actions. The
+`kit github install` command scaffolds a workflow that triggers when someone
+comments `/kit ...` on an issue or pull request review, runs the agent
+non-interactively in the runner, and lets it respond.
+
+```bash
+kit github install
+```
+
+This writes `.github/workflows/kit.yml`. By default the command prompts for the
+model (pre-filled with a sensible default); pass `--model` to skip the prompt.
+If the [`gh` CLI](https://cli.github.com/) is detected on your `PATH` and the
+provider API key is present in your environment, you'll be offered the option to
+store it as a repository secret automatically.
+
+The generated workflow:
+
+- Triggers only on `issue_comment` and `pull_request_review_comment` (`types: [created]`).
+- Runs only when the comment begins with the `/kit` command token.
+- Restricts triggers to repository owners, members, and collaborators (via `author_association`).
+- Uses least-privilege `permissions` and `persist-credentials: false`.
+- Authenticates git/PR operations with the built-in `secrets.GITHUB_TOKEN` and
+  the provider via a repository secret (e.g. `ANTHROPIC_API_KEY`).
+
+After committing the workflow and setting the provider secret, comment
+`/kit <your request>` on any issue or pull request to trigger Kit.
+
+The generated workflow uses the bundled [`mark3labs/kit`](action.yml) composite
+action, which installs the Kit binary and runs `kit github run`. That command
+reads the triggering event, enforces permissions, reacts with an emoji, runs the
+agent against the issue thread or pull request, posts the response as a comment,
+and — if the agent changed files — pushes a `kit-agent[bot]` branch and opens a
+pull request.
+
+| Flag | Description |
+| --- | --- |
+| `--model` | Provider/model to write into the workflow |
+| `--force` | Overwrite an existing workflow file |
+| `--no-secret` | Skip the offer to set the provider secret via the `gh` CLI |
+
 ## Session Management

 Kit uses a tree-based session model that supports branching and forking conversations.
@@ -480,6 +576,17 @@ During an interactive session, use these slash commands:
 | `/fork` | Fork to new session from an earlier message |
 | `/new` | Start a fresh session |

+### Keyboard Shortcuts
+
+| Shortcut | Description |
+|----------|-------------|
+| `Ctrl+V` | Paste an image from the clipboard — shows an inline low-res thumbnail preview (tmux/zellij-safe) |
+| `Ctrl+U` | Clear all pending image attachments |
+| `Ctrl+X e` | Open `$VISUAL`/`$EDITOR` to compose or edit your prompt |
+| `Ctrl+X s` | Steer — inject a system-level instruction mid-turn |
+| `ESC ESC` | Cancel the current operation (tool call or streaming) |
+| `↑` / `↓` | Navigate prompt history |
+
 ## Go SDK

 Embed Kit in your Go applications:
@@ -522,16 +629,37 @@ host, err := kit.New(ctx, &kit.Options{
    SystemPrompt: "You are a helpful bot",
    ConfigFile:   "/path/to/config.yml",
    MaxSteps:     10,
-    Streaming:    true,
+    Streaming:    ptr(true), // *bool: nil = unset (default true), &false = off
    Quiet:        true,

+    // Generation parameters (override env/config/per-model defaults)
+    MaxTokens:        16384,             // 0 = auto-resolve (env → config → per-model → 8192 floor)
+    ThinkingLevel:    "medium",          // "off", "none", "minimal", "low", "medium", "high"
+    Temperature:      ptr(float32(0.2)), // pointer so 0.0 != unset; nil = provider default
+    TopP:             nil,                // nil = leave provider/per-model default
+    TopK:             nil,
+    FrequencyPenalty: nil,
+    PresencePenalty:  nil,
+
+    // Provider configuration (override env/config without reaching into viper)
+    ProviderAPIKey: "sk-...",                      // "" = use config / provider env var
+    ProviderURL:    "https://proxy.internal/v1",   // "" = provider default
+    TLSSkipVerify:  false,                         // only takes effect when true
+
    // Session options
    SessionPath:  "./session.jsonl",  // Open specific session
    Continue:     true,                // Resume most recent session
    NoSession:    true,                // Ephemeral mode

    // Tool options
-    ExtraTools:   []kit.Tool{...},     // Additional tools alongside defaults
+    Tools:            []kit.Tool{...},     // Replace default tool set entirely
+    ExtraTools:       []kit.Tool{...},     // Add tools alongside defaults
+    DisableCoreTools: true,                // Disable all built-in core tools; also controllable via
+                                           // --no-core-tools flag, KIT_NO_CORE_TOOLS env var,
+                                           // or no-core-tools: true in .kit.yml
+
+    // Configuration
+    SkipConfig:   true,                   // Skip .kit.yml files (viper defaults + env vars still apply)

    // Compaction
    AutoCompact:  true,                // Auto-compact near context limit
@@ -540,6 +668,151 @@ host, err := kit.New(ctx, &kit.Options{
 })
 ```

+**Generation & provider fields** (added in v0.55+) let SDK consumers configure
+Kit entirely in-code without `viper.Set()` workarounds or shipping a `.kit.yml`.
+Precedence is `Options` > `KIT_*` env vars > `.kit.yml` > per-model defaults
+(`modelSettings` / `customModels`) > provider-level defaults. Sampling params
+are pointer types so explicit `0.0` is distinguishable from "leave alone"; a
+non-zero `MaxTokens` suppresses automatic right-sizing the same way `--max-tokens`
+does on the CLI.
+
+### Functional options (`NewAgent`)
+
+For simple programmatic setups, `kit.NewAgent` offers an ergonomic
+functional-options front door over `kit.New`. Streaming is **enabled by
+default**; pass `kit.WithStreaming(false)` to opt out.
+
+```go
+host, err := kit.NewAgent(ctx,
+    kit.WithModel("anthropic/claude-sonnet-4-5-20250929"),
+    kit.WithSystemPrompt("You are a helpful assistant."),
+    kit.WithMaxTokens(8192),
+    kit.WithThinkingLevel("medium"),
+    kit.Ephemeral(), // in-memory session, no persistence
+)
+```
+
+Available options: `WithModel`, `WithSystemPrompt`, `WithStreaming`,
+`WithMaxTokens`, `WithThinkingLevel`, `WithTools`, `WithExtraTools`,
+`WithProviderAPIKey`, `WithProviderURL`, `WithConfigFile`, `WithDebug`,
+`WithDebugLogger`, and `Ephemeral`. For advanced configuration not covered by
+the helpers (custom MCP config, in-process MCP servers, session backends, MCP
+task tuning) construct an `Options` value explicitly and call `kit.New`.
+
+### Per-instance config isolation
+
+Each `kit.New` / `kit.NewAgent` call owns an **isolated configuration store**,
+so constructing multiple Kit instances in the same process is safe: setting the
+model, thinking level, or generation parameters on one never affects another,
+and runtime mutators (`SetModel`, `SetThinkingLevel`) only touch the owning
+instance. This makes subagent spawning and multi-Kit embedding race-free with
+no external synchronization required.
+
+### MCP OAuth (remote MCP servers)
+
+When a remote MCP server returns 401, Kit runs the full OAuth flow (dynamic
+client registration → PKCE → token exchange → persistence) but delegates the
+user-facing step — showing the authorization URL and receiving the callback —
+to an `MCPAuthHandler` that you pass explicitly via `Options.MCPAuthHandler`.
+If nil, OAuth is disabled and the authorization-required error surfaces to the
+caller; the SDK never auto-opens a browser or binds a localhost port.
+
+```go
+// CLI/TUI apps: opens the system browser + prints status to stderr.
+authHandler, _ := kit.NewCLIMCPAuthHandler()
+defer authHandler.Close()
+
+host, _ := kit.New(ctx, &kit.Options{
+    MCPAuthHandler: authHandler,
+})
+
+// Custom UX: reuse the SDK's port + callback server, supply your own
+// presentation via OnAuthURL (TUI modal, QR code, web redirect, etc.).
+//   h, _ := kit.NewDefaultMCPAuthHandler()
+//   h.OnAuthURL = func(server, authURL string) { myUI.Show(server, authURL) }
+//
+// Full control (web apps, daemons): implement kit.MCPAuthHandler yourself —
+// no localhost binding, no side effects.
+```
+
+Tokens are persisted to `$XDG_CONFIG_HOME/.kit/mcp_tokens.json` by default; swap
+in a custom `MCPTokenStoreFactory` for encrypted, DB-backed, or in-memory
+storage. See the [SDK options docs](/sdk/options#mcp-oauth-authorization) for
+the full matrix.
+
+### MCP Tasks (long-running tools)
+
+Kit advertises [MCP task support](https://modelcontextprotocol.io/specification/2025-11-25/basic/utilities/tasks)
+during `initialize`, so cooperating MCP servers can respond to `tools/call`
+with a `taskId` instead of blocking the connection. Kit then polls
+`tasks/get` / `tasks/result` until the task reaches a terminal state, and
+best-effort `tasks/cancel`s on context cancellation.
+
+Defaults are safe — a server that doesn't advertise task capability runs
+synchronously, exactly as before. Opt in per server via `tasksMode` in
+`.kit.yml` (`auto` | `never` | `always`) or programmatically through the SDK:
+
+```go
+host, _ := kit.New(ctx, &kit.Options{
+    MCPTaskMode: map[string]kit.MCPTaskMode{
+        "build-server": kit.MCPTaskModeAlways,
+    },
+    MCPTaskTimeout:  15 * time.Minute,
+    MCPTaskProgress: func(p kit.MCPTaskProgress) {
+        log.Printf("%s: %s", p.TaskID, p.Status)
+    },
+})
+
+tasks, _ := host.ListMCPTasks(ctx, "build-server")
+_, _    = host.CancelMCPTask(ctx, "build-server", tasks[0].TaskID)
+```
+
+See the [configuration docs](/configuration#mcp-tasks-long-running-tools) and
+[SDK options → MCP Tasks](/sdk/options#mcp-tasks) for the full surface.
+
+### Custom Tools
+
+Create custom tools with automatic schema generation — no external dependencies needed:
+
+```go
+type SearchInput struct {
+    Query string `json:"query" description:"Search query"`
+}
+
+searchTool := kit.NewTool("search", "Search the codebase",
+    func(ctx context.Context, input SearchInput) (kit.ToolOutput, error) {
+        return kit.TextResult("Found: ..."), nil
+    },
+)
+
+host, _ := kit.New(ctx, &kit.Options{
+    ExtraTools: []kit.Tool{searchTool}, // adds alongside built-in tools
+})
+```
+
+Use `kit.NewParallelTool` for tools safe to run concurrently. Binary data (images, audio, etc.) in `ToolOutput.Data` is automatically forwarded to the LLM when `MediaType` is set. See the [SDK docs](/sdk/overview) for full details on struct tags, `ToolOutput` fields, and `ToolCallIDFromContext`.
+
+#### Return Helpers
+
+| Helper | Description |
+| --- | --- |
+| `kit.TextResult(content)` | Successful text result |
+| `kit.ErrorResult(content)` | Error result (LLM sees it as a tool error) |
+| `kit.ImageResult(content, data, mediaType)` | Image result with binary data (e.g. `"image/png"`) |
+| `kit.MediaResult(content, data, mediaType)` | Non-image media result (e.g. `"audio/mpeg"`) |
+
+#### ToolOutput Fields
+
+```go
+kit.ToolOutput{
+    Content:   "result text",     // text returned to the LLM
+    IsError:   false,             // true = LLM sees this as an error
+    Data:      pngBytes,          // optional binary data (images, audio)
+    MediaType: "image/png",       // MIME type for binary Data
+    Metadata:  map[string]any{},  // opaque metadata for hooks/UI (not sent to LLM)
+}
+```
+
 ### With Callbacks

 ```go
@@ -555,7 +828,7 @@ unsub2 := host.OnToolResult(func(e kit.ToolResultEvent) {
 })
 defer unsub2()

-unsub3 := host.OnStreaming(func(e kit.MessageUpdateEvent) {
+unsub3 := host.OnMessageUpdate(func(e kit.MessageUpdateEvent) {
    print(e.Chunk)
 })
 defer unsub3()
@@ -592,6 +865,50 @@ host, _ := kit.New(ctx, &kit.Options{
 })
 ```

+### Runtime Skills & Context Files
+
+For multi-tenant hosts (chatbots, per-user agents, web services), the SDK
+lets you swap skills and `AGENTS.md`-style context files **after** Kit
+construction. Every mutation recomposes the system prompt and applies it to
+the agent so the next turn picks up the new instructions — no restart needed.
+
+```go
+// Programmatic skill (no file on disk required).
+host.AddSkill(&kit.Skill{
+    Name:        "polite-french",
+    Description: "Respond in French and always greet the user.",
+    Content:     "Always reply in French. Open every response with 'Bonjour'.",
+})
+
+// Or load one from disk.
+host.LoadAndAddSkill("/var/skills/refund-policy.md")
+
+// Per-user AGENTS.md content pulled from a database.
+host.AddContextFileContent(
+    fmt.Sprintf("session://%s/AGENTS.md", userID),
+    rulesFromDB,
+)
+
+// Tear down session-specific state on logout.
+host.RemoveSkill("polite-french")
+host.RemoveContextFile(fmt.Sprintf("session://%s/AGENTS.md", userID))
+
+// Hide a skill from the model catalog without unloading it (still usable
+// via /skill:); EnableSkill reverses it.
+host.DisableSkill("refund-policy")
+host.EnableSkill("refund-policy")
+
+// Or replace the whole set atomically.
+host.SetSkills(activeSkillsForUser)
+host.SetContextFiles(activeContextForUser)
+```
+
+Skills dedupe by `Name`, context files dedupe by `Path` (which can be any
+opaque identifier — it doesn't have to be a real filesystem path). All
+mutators and readers (`GetSkills`, `GetContextFiles`) are safe to call
+concurrently from multiple goroutines. See the [SDK overview docs](/sdk/overview#runtime-skills-and-context-files)
+for the full reference.
+
 ## Advanced Usage

 ### Subagent Pattern
@@ -708,6 +1025,7 @@ npm/                 - NPM package wrapper for distribution

 - **Anthropic** - Claude models (native, prompt caching, OAuth)
 - **OpenAI** - GPT models
+- **Copilot** - GitHub Copilot models (`copilot`, requires active Copilot subscription)
 - **Google** - Gemini models
 - **Ollama** - Local models
 - **Azure OpenAI** - Azure-hosted OpenAI
@@ -733,6 +1051,31 @@ This automatically defaults to `custom/custom` without needing to specify a mode
 - Reasoning and temperature support
 - Optional `CUSTOM_API_KEY` environment variable or `--provider-api-key` flag

+### Auto-routed Providers
+
+Any provider in the [models.dev](https://models.dev) database can be used as
+`provider/model` without a dedicated native integration. Kit auto-routes the
+request through the matching **wire protocol** based on the provider's npm package
+(or per-model override), using its `api` URL as the base:
+
+| npm package | Wire protocol |
+|-------------|---------------|
+| `@ai-sdk/openai` | OpenAI (Responses API) |
+| `@ai-sdk/openai-compatible` | OpenAI (chat completions) |
+| `@ai-sdk/anthropic` | Anthropic |
+| `@ai-sdk/google` | Google Gemini |
+
+Providers with an `api` URL but an unrecognized npm package fall back to the
+OpenAI-compatible wire. Because routing follows the wire protocol, aggregator/proxy
+providers work across all of their models — including Claude, GPT, *and* Gemini
+routes:
+
+```bash
+kit --model opencode/claude-haiku-4-5 "Hello"     # → Anthropic wire
+kit --model opencode/gpt-5 "Hello"                # → OpenAI wire
+kit --model opencode/gemini-3.5-flash "Hello"     # → Google wire
+```
+
 ### Model String Format

 ```bash
@@ -0,0 +1,75 @@
+name: "Kit"
+description: "Run Kit as an automated collaborator/reviewer on GitHub issues and pull requests."
+author: "mark3labs"
+branding:
+  icon: "git-merge"
+  color: "purple"
+
+inputs:
+  model:
+    description: "Provider/model Kit should use (e.g. anthropic/claude-sonnet-4-5-20250929). Defaults to Kit's built-in default."
+    required: false
+    default: ""
+  version:
+    description: "Kit version to install (e.g. v0.77.0). Defaults to the latest release."
+    required: false
+    default: "latest"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Install Kit
+      shell: bash
+      env:
+        KIT_VERSION: ${{ inputs.version }}
+      run: |
+        set -euo pipefail
+
+        VERSION="${KIT_VERSION:-latest}"
+        if [ -z "$VERSION" ] || [ "$VERSION" = "latest" ]; then
+          VERSION="$(curl -fsSL https://api.github.com/repos/mark3labs/kit/releases/latest \
+            | grep -o '"tag_name": *"[^"]*"' | head -1 | cut -d'"' -f4)"
+        fi
+        if [ -z "$VERSION" ]; then
+          echo "::error::could not determine Kit version to install" >&2
+          exit 1
+        fi
+        VER="${VERSION#v}"
+
+        case "$(uname -s)" in
+          Linux)  OS=linux ;;
+          Darwin) OS=darwin ;;
+          *) echo "::error::unsupported OS $(uname -s)" >&2; exit 1 ;;
+        esac
+        case "$(uname -m)" in
+          x86_64|amd64)  ARCH=amd64 ;;
+          aarch64|arm64) ARCH=arm64 ;;
+          *) echo "::error::unsupported arch $(uname -m)" >&2; exit 1 ;;
+        esac
+
+        URL="https://github.com/mark3labs/kit/releases/download/${VERSION}/kit_${VER}_${OS}_${ARCH}.tar.gz"
+        echo "Installing Kit ${VERSION} from ${URL}"
+
+        TMP="$(mktemp -d)"
+        curl -fsSL "$URL" | tar -xz -C "$TMP"
+        mkdir -p "$HOME/.kit/bin"
+        mv "$TMP/kit" "$HOME/.kit/bin/kit"
+        chmod +x "$HOME/.kit/bin/kit"
+        echo "$HOME/.kit/bin" >> "$GITHUB_PATH"
+        rm -rf "$TMP"
+
+    - name: Verify Kit
+      shell: bash
+      run: kit --version
+
+    - name: Run Kit
+      shell: bash
+      env:
+        MODEL: ${{ inputs.model }}
+      run: |
+        set -euo pipefail
+        ARGS=()
+        if [ -n "${MODEL:-}" ]; then
+          ARGS+=(--model "$MODEL")
+        fi
+        kit github run ${ARGS[@]+"${ARGS[@]}"}
@@ -11,6 +11,7 @@ import (

 	"charm.land/huh/v2"
 	"github.com/mark3labs/kit/internal/auth"
+	"github.com/mark3labs/kit/internal/ui"
 	kit "github.com/mark3labs/kit/pkg/kit"
 	"github.com/spf13/cobra"
 )
@@ -30,10 +31,12 @@ using OAuth flows. Stored credentials take precedence over environment variables
 Available providers:
  - anthropic: Anthropic Claude API (OAuth)
  - openai:    OpenAI API (OAuth and API key)
+  - copilot:   GitHub Copilot (GitHub device login)

 Examples:
  kit auth login anthropic
  kit auth login openai
+  kit auth login copilot
  kit auth logout anthropic
  kit auth status`,
 }
@@ -53,10 +56,16 @@ environment variables when making API calls.
 Available providers:
  - anthropic: Anthropic Claude API (OAuth)
  - openai:    OpenAI ChatGPT Plus/Pro (Codex OAuth)
+  - copilot:   GitHub Copilot (GitHub device login, experimental)

-Example:
+Flags:
+  --set-default   Set this provider's default model as the system default
+
+Examples:
  kit auth login anthropic
-  kit auth login openai`,
+  kit auth login openai
+  kit auth login copilot
+  kit auth login copilot --set-default`,
 	Args: cobra.ExactArgs(1),
 	RunE: runAuthLogin,
 }
@@ -75,10 +84,12 @@ You will need to use environment variables or command-line flags for authenticat
 Available providers:
  - anthropic: Anthropic Claude API
  - openai:    OpenAI API
+  - copilot:   GitHub Copilot

 Example:
  kit auth logout anthropic
-  kit auth logout openai`,
+  kit auth logout openai
+  kit auth logout copilot`,
 	Args: cobra.ExactArgs(1),
 	RunE: runAuthLogout,
 }
@@ -99,12 +110,47 @@ Example:
 	RunE: runAuthStatus,
 }

+var (
+	loginSetDefault bool
+)
+
+// defaultModels maps providers to their recommended default models.
+// These are used when --set-default flag is passed to auth login.
+var defaultModels = map[string]string{
+	"anthropic": "anthropic/claude-sonnet-4-5-20250929",
+	"openai":    "openai/gpt-5.4",
+	"copilot":   "copilot/gpt-5.5",
+}
+
+// setDefaultModelIfRequested sets the default model for the given provider
+// if the --set-default flag was provided.
+func setDefaultModelIfRequested(provider string) error {
+	if !loginSetDefault {
+		return nil
+	}
+
+	model, ok := defaultModels[provider]
+	if !ok {
+		return fmt.Errorf("no default model configured for provider: %s", provider)
+	}
+
+	if err := ui.SaveModelPreference(model); err != nil {
+		return fmt.Errorf("failed to save model preference: %w", err)
+	}
+
+	fmt.Printf("\n✓ Set default model to: %s\n", model)
+	return nil
+}
+
 func init() {
 	authCmd.AddCommand(authLoginCmd)
 	authCmd.AddCommand(authLogoutCmd)
 	authCmd.AddCommand(authStatusCmd)
+
+	authLoginCmd.Flags().BoolVar(&loginSetDefault, "set-default", false, "Set this provider's default model as the system default after login")
 }

+// runAuthLogin dispatches OAuth login to the selected provider.
 func runAuthLogin(cmd *cobra.Command, args []string) error {
 	provider := strings.ToLower(args[0])

@@ -113,8 +159,10 @@ func runAuthLogin(cmd *cobra.Command, args []string) error {
 		return loginAnthropic()
 	case "openai":
 		return loginOpenAI()
+	case "copilot":
+		return loginCopilot(cmd.Context())
 	default:
-		return fmt.Errorf("unsupported provider: %s. Available providers: anthropic, openai", provider)
+		return fmt.Errorf("unsupported provider: %s. Available providers: anthropic, openai, copilot", provider)
 	}
 }

@@ -126,8 +174,10 @@ func runAuthLogout(cmd *cobra.Command, args []string) error {
 		return logoutAnthropic()
 	case "openai":
 		return logoutOpenAI()
+	case "copilot":
+		return logoutCopilot()
 	default:
-		return fmt.Errorf("unsupported provider: %s. Available providers: anthropic, openai", provider)
+		return fmt.Errorf("unsupported provider: %s. Available providers: anthropic, openai, copilot", provider)
 	}
 }

@@ -206,9 +256,31 @@ func runAuthStatus(cmd *cobra.Command, args []string) error {
 		}
 	}

+	// Check GitHub Copilot credentials
+	fmt.Print("\nGitHub Copilot: ")
+	if hasCopilotCreds, err := cm.HasCopilotCredentials(); err != nil {
+		fmt.Printf("Error checking credentials: %v\n", err)
+	} else if hasCopilotCreds {
+		if creds, err := cm.GetCopilotCredentials(); err != nil {
+			fmt.Printf("Error reading credentials: %v\n", err)
+		} else {
+			status := "✓ Authenticated"
+			if creds.IsExpired() {
+				status = "⚠️  Token expired (will refresh automatically)"
+			} else if creds.NeedsRefresh() {
+				status = "⚠️  Token expires soon (will refresh automatically)"
+			}
+
+			fmt.Printf("%s (GitHub OAuth, stored %s)\n", status, creds.CreatedAt.Format("2006-01-02 15:04:05"))
+		}
+	} else {
+		fmt.Println("✗ Not authenticated")
+	}
+
 	fmt.Println("\nTo authenticate with a provider:")
 	fmt.Println("  kit auth login anthropic")
 	fmt.Println("  kit auth login openai")
+	fmt.Println("  kit auth login copilot")

 	return nil
 }
@@ -288,6 +360,17 @@ func loginAnthropic() error {
 	fmt.Println("\n🎉 Your OAuth credentials will now be used for Anthropic API calls.")
 	fmt.Println("💡 You can check your authentication status with: kit auth status")

+	// Set default model if requested
+	if err := setDefaultModelIfRequested("anthropic"); err != nil {
+		return err
+	}
+
+	// Remind users how to set this as default if they didn't use --set-default
+	if !loginSetDefault {
+		fmt.Println("\n💡 To set Anthropic as your default model, run:")
+		fmt.Println("   kit auth login anthropic --set-default")
+	}
+
 	return nil
 }

@@ -454,6 +537,96 @@ func loginOpenAI() error {
 	fmt.Println("\n🎉 Your OAuth credentials will now be used for OpenAI API calls.")
 	fmt.Println("💡 You can check your authentication status with: kit auth status")

+	// Set default model if requested
+	if err := setDefaultModelIfRequested("openai"); err != nil {
+		return err
+	}
+
+	// Remind users how to set this as default if they didn't use --set-default
+	if !loginSetDefault {
+		fmt.Println("\n💡 To set OpenAI as your default model, run:")
+		fmt.Println("   kit auth login openai --set-default")
+	}
+
+	return nil
+}
+
+// loginCopilot authenticates GitHub Copilot using GitHub device flow.
+func loginCopilot(ctx context.Context) error {
+	if ctx == nil {
+		ctx = context.Background()
+	}
+
+	cm, err := kit.NewCredentialManager()
+	if err != nil {
+		return fmt.Errorf("failed to initialize credential manager: %w", err)
+	}
+
+	if hasAuth, err := cm.HasCopilotCredentials(); err == nil && hasAuth {
+		var reauth bool
+		err := huh.NewConfirm().
+			Title("You are already authenticated with GitHub Copilot").
+			Description("Do you want to re-authenticate?").
+			Affirmative("Yes").
+			Negative("No").
+			Value(&reauth).
+			Run()
+		if err != nil {
+			return fmt.Errorf("failed to prompt for re-authentication: %w", err)
+		}
+		if !reauth {
+			fmt.Println("Authentication cancelled.")
+			return nil
+		}
+	}
+
+	client := auth.NewCopilotOAuthClient()
+
+	fmt.Println("🔐 Starting GitHub Copilot authentication...")
+	fmt.Println("This uses GitHub device login and requires an active GitHub Copilot subscription.")
+	fmt.Println("Experimental: this uses VS Code Copilot Chat client identifiers.")
+	fmt.Println()
+
+	deviceCode, err := client.StartDeviceFlow(ctx)
+	if err != nil {
+		return fmt.Errorf("failed to start GitHub device login: %w", err)
+	}
+
+	fmt.Println("📱 Open this page and enter the code:")
+	fmt.Printf("\n%s\n\n", deviceCode.VerificationURI)
+	fmt.Printf("Code: %s\n\n", deviceCode.UserCode)
+	auth.TryOpenBrowser(deviceCode.VerificationURI)
+
+	fmt.Println("Waiting for GitHub authorization...")
+	githubToken, err := client.PollDeviceToken(ctx, deviceCode)
+	if err != nil {
+		return fmt.Errorf("failed to complete GitHub device login: %w", err)
+	}
+
+	fmt.Println("\n🔄 Exchanging GitHub token for Copilot access token...")
+	creds, err := client.ExchangeGitHubToken(ctx, githubToken)
+	if err != nil {
+		return fmt.Errorf("failed to get GitHub Copilot token: %w", err)
+	}
+
+	if err := cm.SetCopilotOAuthCredentials(creds); err != nil {
+		return fmt.Errorf("failed to store credentials: %w", err)
+	}
+
+	fmt.Println("✅ Successfully authenticated with GitHub Copilot!")
+	fmt.Printf("📁 Credentials stored in: %s\n", cm.GetCredentialsPath())
+	fmt.Println("\n🎉 Your GitHub Copilot credentials will now be used for copilot/* models.")
+	fmt.Println("💡 You can check your authentication status with: kit auth status")
+
+	if err := setDefaultModelIfRequested("copilot"); err != nil {
+		return err
+	}
+
+	if !loginSetDefault {
+		fmt.Println("\n💡 To set Copilot as your default model, run:")
+		fmt.Println("   kit auth login copilot --set-default")
+	}
+
 	return nil
 }

@@ -504,13 +677,13 @@ func startOpenAICallbackServer(expectedState string) (*callbackServer, error) {
 		}

 		// Return success page
-		w.Header().Set("Content-Type", "text/html")
+		w.Header().Set("Content-Type", "text/html; charset=utf-8")
 		w.WriteHeader(http.StatusOK)
 		_, _ = fmt.Fprintf(w, `<!DOCTYPE html>
 <html>
 <head><title>Authentication Successful</title></head>
 <body style="font-family: sans-serif; text-align: center; padding: 50px;">
-<h1>✓ Authentication Successful</h1>
+<h1>&#10003; Authentication Successful</h1>
 <p>You can close this window and return to the terminal.</p>
 </body>
 </html>`)
@@ -575,3 +748,43 @@ func logoutOpenAI() error {

 	return nil
 }
+
+func logoutCopilot() error {
+	cm, err := kit.NewCredentialManager()
+	if err != nil {
+		return fmt.Errorf("failed to initialize credential manager: %w", err)
+	}
+
+	hasAuth, err := cm.HasCopilotCredentials()
+	if err != nil {
+		return fmt.Errorf("failed to check authentication status: %w", err)
+	}
+
+	if !hasAuth {
+		fmt.Println("You are not currently authenticated with GitHub Copilot.")
+		return nil
+	}
+
+	var confirm bool
+	err = huh.NewConfirm().
+		Title("Remove GitHub Copilot credentials").
+		Description("Are you sure you want to remove your stored credentials?").
+		Affirmative("Yes").
+		Negative("No").
+		Value(&confirm).
+		Run()
+	if err != nil || !confirm {
+		fmt.Println("Logout cancelled.")
+		return nil
+	}
+
+	if err := cm.RemoveCopilotCredentials(); err != nil {
+		return fmt.Errorf("failed to remove credentials: %w", err)
+	}
+
+	fmt.Println("✓ Successfully logged out from GitHub Copilot!")
+	fmt.Println("You will need to authenticate again with 'kit auth login copilot'.")
+	fmt.Println("Tip: this removes local credentials only. Revoke the GitHub OAuth grant at https://github.com/settings/applications")
+
+	return nil
+}
@@ -0,0 +1,311 @@
+package cmd
+
+import (
+	"context"
+	"fmt"
+	"os"
+
+	"github.com/spf13/viper"
+	"golang.org/x/term"
+
+	"github.com/mark3labs/kit/internal/app"
+	"github.com/mark3labs/kit/internal/extbridge"
+	"github.com/mark3labs/kit/internal/extensions"
+	"github.com/mark3labs/kit/internal/models"
+	"github.com/mark3labs/kit/internal/ui"
+	kit "github.com/mark3labs/kit/pkg/kit"
+)
+
+// extensionContextDeps groups the runtime dependencies needed to wire up
+// an extensions.Context for the interactive TUI mode.
+type extensionContextDeps struct {
+	ctx          context.Context
+	cwd          string
+	modelName    string
+	interactive  bool
+	kitInstance  *kit.Kit
+	appInstance  *app.App
+	usageTracker *ui.UsageTracker
+}
+
+// buildInteractiveExtensionContext returns an extensions.Context with every
+// field except Print / PrintInfo / PrintError populated. Callers must set
+// the three print routes appropriately for their phase (startup buffering
+// vs. live runtime routing).
+//
+// The headless half (data access, state, options, tree navigation, skills,
+// templates, model resolution, subagents) comes from extbridge.BaseContext;
+// this function overlays the TUI-specific fields and overrides SetModel /
+// ReloadExtensions with TUI-aware versions.
+func buildInteractiveExtensionContext(deps extensionContextDeps) extensions.Context {
+	kitInstance := deps.kitInstance
+	appInstance := deps.appInstance
+	usageTracker := deps.usageTracker
+
+	ec := extbridge.BaseContext(deps.ctx, kitInstance)
+
+	ec.CWD = deps.cwd
+	ec.Model = deps.modelName
+	ec.Interactive = deps.interactive
+
+	ec.PrintBlock = func(opts extensions.PrintBlockOpts) {
+		appInstance.PrintBlockFromExtension(opts)
+	}
+	ec.SendMessage = func(text string) { appInstance.Run(text) }
+	ec.CancelAndSend = func(text string) { appInstance.InterruptAndSend(text) }
+	ec.Abort = func() { appInstance.Abort() }
+	ec.IsIdle = func() bool { return !appInstance.IsBusy() }
+	ec.Compact = func(cfg extensions.CompactConfig) error {
+		return appInstance.CompactAsync(cfg.CustomInstructions, cfg.OnComplete, cfg.OnError)
+	}
+	ec.SendMultimodalMessage = func(text string, files []extensions.FilePart) {
+		parts := make([]kit.LLMFilePart, len(files))
+		for i, f := range files {
+			parts[i] = kit.LLMFilePart{
+				Filename:  f.Filename,
+				Data:      f.Data,
+				MediaType: f.MediaType,
+			}
+		}
+		appInstance.RunWithFiles(text, parts)
+	}
+	ec.NewSession = func(prompt string) error {
+		return appInstance.RequestNewSessionFromExtension(prompt)
+	}
+	ec.GetSessionUsage = func() extensions.SessionUsage {
+		if usageTracker == nil {
+			return extensions.SessionUsage{}
+		}
+		stats := usageTracker.GetSessionStats()
+		return extensions.SessionUsage{
+			TotalInputTokens:      stats.TotalInputTokens,
+			TotalOutputTokens:     stats.TotalOutputTokens,
+			TotalCacheReadTokens:  stats.TotalCacheReadTokens,
+			TotalCacheWriteTokens: stats.TotalCacheWriteTokens,
+			TotalCost:             stats.TotalCost,
+			RequestCount:          stats.RequestCount,
+		}
+	}
+	ec.Exit = func() { appInstance.QuitFromExtension() }
+
+	// TUI widgets/chrome — mutate runner state, then notify the TUI.
+	// Always use a goroutine for NotifyWidgetUpdate: prog.Send() deadlocks
+	// if called synchronously from inside BubbleTea's Update() handler.
+	// All call sites use go-routines uniformly.
+	ec.SetWidget = func(config extensions.WidgetConfig) {
+		kitInstance.Extensions().SetWidget(config)
+		go appInstance.NotifyWidgetUpdate()
+	}
+	ec.RemoveWidget = func(id string) {
+		kitInstance.Extensions().RemoveWidget(id)
+		go appInstance.NotifyWidgetUpdate()
+	}
+	ec.SetHeader = func(config extensions.HeaderFooterConfig) {
+		kitInstance.Extensions().SetHeader(config)
+		go appInstance.NotifyWidgetUpdate()
+	}
+	ec.RemoveHeader = func() {
+		kitInstance.Extensions().RemoveHeader()
+		go appInstance.NotifyWidgetUpdate()
+	}
+	ec.SetFooter = func(config extensions.HeaderFooterConfig) {
+		kitInstance.Extensions().SetFooter(config)
+		go appInstance.NotifyWidgetUpdate()
+	}
+	ec.RemoveFooter = func() {
+		kitInstance.Extensions().RemoveFooter()
+		go appInstance.NotifyWidgetUpdate()
+	}
+	ec.SetUIVisibility = func(v extensions.UIVisibility) {
+		kitInstance.Extensions().SetUIVisibility(v)
+		go appInstance.NotifyWidgetUpdate()
+	}
+	ec.SetEditor = func(config extensions.EditorConfig) {
+		kitInstance.Extensions().SetEditor(config)
+		go appInstance.NotifyWidgetUpdate()
+	}
+	ec.ResetEditor = func() {
+		kitInstance.Extensions().ResetEditor()
+		go appInstance.NotifyWidgetUpdate()
+	}
+	ec.SetEditorText = func(text string) {
+		appInstance.SetEditorTextFromExtension(text)
+	}
+	ec.SetStatus = func(key string, text string, priority int) {
+		kitInstance.Extensions().SetStatus(extensions.StatusBarEntry{
+			Key:      key,
+			Text:     text,
+			Priority: priority,
+		})
+		go appInstance.NotifyWidgetUpdate()
+	}
+	ec.RemoveStatus = func(key string) {
+		kitInstance.Extensions().RemoveStatus(key)
+		go appInstance.NotifyWidgetUpdate()
+	}
+
+	// Interactive prompts — channel-based round trips through the TUI.
+	ec.PromptSelect = func(config extensions.PromptSelectConfig) extensions.PromptSelectResult {
+		ch := make(chan app.PromptResponse, 1)
+		appInstance.SendPromptRequest(app.PromptRequestEvent{
+			PromptType: "select",
+			Message:    config.Message,
+			Options:    config.Options,
+			ResponseCh: ch,
+		})
+		resp := <-ch
+		if resp.Cancelled {
+			return extensions.PromptSelectResult{Cancelled: true}
+		}
+		return extensions.PromptSelectResult{Value: resp.Value, Index: resp.Index}
+	}
+	ec.PromptConfirm = func(config extensions.PromptConfirmConfig) extensions.PromptConfirmResult {
+		ch := make(chan app.PromptResponse, 1)
+		def := "false"
+		if config.DefaultValue {
+			def = "true"
+		}
+		appInstance.SendPromptRequest(app.PromptRequestEvent{
+			PromptType: "confirm",
+			Message:    config.Message,
+			Default:    def,
+			ResponseCh: ch,
+		})
+		resp := <-ch
+		if resp.Cancelled {
+			return extensions.PromptConfirmResult{Cancelled: true}
+		}
+		return extensions.PromptConfirmResult{Value: resp.Confirmed}
+	}
+	ec.PromptInput = func(config extensions.PromptInputConfig) extensions.PromptInputResult {
+		ch := make(chan app.PromptResponse, 1)
+		appInstance.SendPromptRequest(app.PromptRequestEvent{
+			PromptType:  "input",
+			Message:     config.Message,
+			Placeholder: config.Placeholder,
+			Default:     config.Default,
+			ResponseCh:  ch,
+		})
+		resp := <-ch
+		if resp.Cancelled {
+			return extensions.PromptInputResult{Cancelled: true}
+		}
+		return extensions.PromptInputResult{Value: resp.Value}
+	}
+	ec.ShowOverlay = func(config extensions.OverlayConfig) extensions.OverlayResult {
+		ch := make(chan app.OverlayResponse, 1)
+		appInstance.SendOverlayRequest(app.OverlayRequestEvent{
+			Title:       config.Title,
+			Content:     config.Content.Text,
+			Markdown:    config.Content.Markdown,
+			BorderColor: config.Style.BorderColor,
+			Background:  config.Style.Background,
+			Width:       config.Width,
+			MaxHeight:   config.MaxHeight,
+			Anchor:      string(config.Anchor),
+			Actions:     config.Actions,
+			ResponseCh:  ch,
+		})
+		resp := <-ch
+		if resp.Cancelled {
+			return extensions.OverlayResult{Cancelled: true, Index: -1}
+		}
+		return extensions.OverlayResult{
+			Action: resp.Action,
+			Index:  resp.Index,
+		}
+	}
+	ec.SuspendTUI = func(callback func()) error {
+		return appInstance.SuspendTUI(callback)
+	}
+
+	// TUI-aware model switch: also notifies the TUI status bar and
+	// refreshes the usage tracker for correct token counting.
+	ec.SetModel = func(modelString string) error {
+		// Capture previous model for the ModelChange event.
+		previousModel := kitInstance.Extensions().GetContext().Model
+		err := kitInstance.SetModel(context.Background(), modelString)
+		if err != nil {
+			return err
+		}
+		// Notify TUI so it updates model in status bar.
+		p, m, _ := models.ParseModelString(modelString)
+		appInstance.NotifyModelChanged(p, m)
+		// Update the context's Model field so handlers see it.
+		kitInstance.Extensions().UpdateContextModel(modelString)
+		// Fire OnModelChange event to extensions.
+		kitInstance.Extensions().EmitModelChange(modelString, previousModel, "extension")
+		// Update usage tracker with new model info for correct token counting.
+		ui.UpdateUsageTrackerForModel(usageTracker, modelString, viper.GetString("provider-api-key"))
+		return nil
+	}
+
+	ec.RenderMessage = func(rendererName, content string) {
+		renderer := kitInstance.Extensions().GetMessageRenderer(rendererName)
+		if renderer == nil || renderer.Render == nil {
+			appInstance.PrintFromExtension("", content)
+			return
+		}
+		w, _, _ := term.GetSize(int(os.Stdout.Fd()))
+		if w == 0 {
+			w = 80
+		}
+		rendered := renderer.Render(content, w)
+		appInstance.PrintFromExtension("", rendered)
+	}
+	ec.ReloadExtensions = func() error {
+		err := kitInstance.Extensions().Reload()
+		if err != nil {
+			return err
+		}
+		// Notify TUI that widgets/status/commands may have changed.
+		go appInstance.NotifyWidgetUpdate()
+		return nil
+	}
+
+	// Theme management (TUI only).
+	ec.RegisterTheme = func(name string, config extensions.ThemeColorConfig) {
+		tc := func(c extensions.ThemeColor) [2]string { return [2]string{c.Light, c.Dark} }
+		ui.RegisterThemeFromConfig(name,
+			tc(config.Primary), tc(config.Secondary),
+			tc(config.Success), tc(config.Warning),
+			tc(config.Error), tc(config.Info),
+			tc(config.Text), tc(config.Muted),
+			tc(config.VeryMuted), tc(config.Background),
+			tc(config.Border), tc(config.MutedBorder),
+			tc(config.System), tc(config.Tool),
+			tc(config.Accent), tc(config.Highlight),
+			tc(config.MdHeading), tc(config.MdLink),
+			tc(config.MdKeyword), tc(config.MdString),
+			tc(config.MdNumber), tc(config.MdComment),
+		)
+	}
+	ec.SetTheme = func(name string) error {
+		return ui.ApplyTheme(name)
+	}
+	ec.ListThemes = func() []string {
+		return ui.ListThemes()
+	}
+
+	// Skill context-injection (drives a new agent turn through the TUI).
+	ec.InjectSkillAsContext = func(skillName string) string {
+		skills := kitInstance.DiscoverSkillsForExtension()
+		for _, s := range skills {
+			if s.Name == skillName {
+				appInstance.Run(fmt.Sprintf("<skill name=%q>\n%s\n</skill>", s.Name, s.Content))
+				return ""
+			}
+		}
+		return fmt.Sprintf("skill not found: %s", skillName)
+	}
+	ec.InjectRawSkillAsContext = func(path string) string {
+		s, err := kitInstance.LoadSkillForExtension(path)
+		if err != "" {
+			return err
+		}
+		appInstance.Run(fmt.Sprintf("<skill name=%q>\n%s\n</skill>", s.Name, s.Content))
+		return ""
+	}
+
+	return ec
+}
@@ -0,0 +1,255 @@
+package cmd
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+
+	"charm.land/huh/v2"
+	"github.com/charmbracelet/log"
+	kit "github.com/mark3labs/kit/pkg/kit"
+	"github.com/spf13/cobra"
+)
+
+// defaultGitHubModel is the model written into the generated workflow when the
+// user does not specify one and runs non-interactively.
+const defaultGitHubModel = "anthropic/claude-sonnet-4-5-20250929"
+
+// githubWorkflowPath is the repository-relative location of the generated
+// GitHub Actions workflow that wires Kit into a repository as a collaborator.
+const githubWorkflowPath = ".github/workflows/kit.yml"
+
+var (
+	githubInstallModel    string
+	githubInstallForce    bool
+	githubInstallNoSecret bool
+)
+
+// githubCmd is the parent command for GitHub integration subcommands. It groups
+// the turnkey setup tooling that wires Kit into a repository as an automated
+// collaborator/reviewer driven by GitHub Actions.
+var githubCmd = &cobra.Command{
+	Use:   "github",
+	Short: "Set up Kit as a GitHub collaborator/reviewer",
+	Long: `Set up Kit as an automated collaborator/reviewer in a GitHub repository.
+
+Kit runs inside a GitHub Actions runner, reads the relevant context (an issue
+thread or pull request), runs the agent non-interactively, and responds by
+posting comments and opening pull requests.
+
+Use 'kit github install' to scaffold the GitHub Actions workflow.`,
+}
+
+// githubInstallCmd scaffolds the GitHub Actions workflow that runs Kit on
+// '/kit' comment triggers. It writes .github/workflows/kit.yml and, when the
+// 'gh' CLI is available, offers to set the provider API key as a repository
+// secret.
+var githubInstallCmd = &cobra.Command{
+	Use:   "install",
+	Short: "Scaffold the GitHub Actions workflow that runs Kit",
+	Long: `Scaffold the GitHub Actions workflow that runs Kit as a collaborator.
+
+This writes .github/workflows/kit.yml configured to trigger when someone
+comments '/kit ...' on an issue or pull request review. The workflow runs Kit
+inside an ephemeral Actions runner with least-privilege permissions and
+'persist-credentials: false', mirroring established security practice.
+
+If the GitHub CLI ('gh') is detected on your PATH, you will be offered the
+option to store your provider API key as a repository secret automatically.
+
+Flags:
+  --model       Provider/model to write into the workflow (e.g. anthropic/claude-sonnet-4-5)
+  --force       Overwrite an existing workflow file
+  --no-secret   Skip the offer to set the provider secret via the gh CLI
+
+Examples:
+  kit github install
+  kit github install --model anthropic/claude-sonnet-4-5-20250929
+  kit github install --force --no-secret`,
+	Args: cobra.NoArgs,
+	RunE: runGitHubInstall,
+}
+
+func init() {
+	githubInstallCmd.Flags().StringVarP(&githubInstallModel, "model", "m", "", "provider/model to write into the workflow")
+	githubInstallCmd.Flags().BoolVar(&githubInstallForce, "force", false, "overwrite an existing workflow file")
+	githubInstallCmd.Flags().BoolVar(&githubInstallNoSecret, "no-secret", false, "skip setting the provider secret via the gh CLI")
+
+	githubCmd.AddCommand(githubInstallCmd)
+	rootCmd.AddCommand(githubCmd)
+}
+
+func runGitHubInstall(cmd *cobra.Command, _ []string) error {
+	model, err := resolveGitHubModel()
+	if err != nil {
+		return err
+	}
+
+	provider, _, err := kit.ParseModelString(model)
+	if err != nil {
+		return fmt.Errorf("invalid model %q: %w", model, err)
+	}
+
+	secretName := providerSecretEnvVar(provider)
+
+	if err := writeGitHubWorkflow(model, secretName, githubInstallForce); err != nil {
+		return err
+	}
+	fmt.Printf("✅ Wrote %s\n", githubWorkflowPath)
+
+	maybeSetProviderSecret(cmd.Context(), secretName)
+
+	printGitHubInstallNextSteps(secretName)
+	log.Info("github workflow scaffolded", "model", model, "secret", secretName)
+	return nil
+}
+
+// resolveGitHubModel determines the model to embed in the workflow. The
+// --model flag takes precedence; otherwise an interactive prompt is shown
+// (pre-filled with the default), and non-interactive runs use the default.
+func resolveGitHubModel() (string, error) {
+	if githubInstallModel != "" {
+		return strings.TrimSpace(githubInstallModel), nil
+	}
+
+	if !isInteractive() {
+		return defaultGitHubModel, nil
+	}
+
+	model := defaultGitHubModel
+	err := huh.NewInput().
+		Title("Model").
+		Description("Provider/model Kit should use in CI (e.g. anthropic/claude-sonnet-4-5)").
+		Value(&model).
+		Run()
+	if err != nil {
+		return "", fmt.Errorf("model selection cancelled: %w", err)
+	}
+
+	model = strings.TrimSpace(model)
+	if model == "" {
+		return "", fmt.Errorf("model cannot be empty")
+	}
+	return model, nil
+}
+
+// providerSecretEnvVar returns the environment variable / repository secret
+// name that holds the API key for the given provider. It consults the model
+// registry and falls back to "<PROVIDER>_API_KEY" for unknown providers.
+func providerSecretEnvVar(provider string) string {
+	if info := kit.GetProviderInfo(provider); info != nil && len(info.Env) > 0 {
+		return info.Env[0]
+	}
+	sanitized := strings.ToUpper(strings.NewReplacer("-", "_", ".", "_").Replace(provider))
+	return sanitized + "_API_KEY"
+}
+
+// renderGitHubWorkflow builds the workflow YAML for the given model and
+// provider secret name.
+func renderGitHubWorkflow(model, secretName string) string {
+	return fmt.Sprintf(`name: kit
+on:
+  issue_comment:
+    types: [created]
+  pull_request_review_comment:
+    types: [created]
+jobs:
+  kit:
+    if: |
+      (github.event.comment.author_association == 'OWNER' ||
+       github.event.comment.author_association == 'MEMBER' ||
+       github.event.comment.author_association == 'COLLABORATOR') &&
+      (startsWith(github.event.comment.body, '/kit ') ||
+       github.event.comment.body == '/kit')
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
+      issues: write
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+      - uses: mark3labs/kit@v0
+        with:
+          model: %s
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          %s: ${{ secrets.%s }}
+`, model, secretName, secretName)
+}
+
+// writeGitHubWorkflow writes the generated workflow to githubWorkflowPath,
+// creating parent directories as needed. It refuses to overwrite an existing
+// file unless force is true.
+func writeGitHubWorkflow(model, secretName string, force bool) error {
+	if _, err := os.Stat(githubWorkflowPath); err == nil && !force {
+		return fmt.Errorf("%s already exists; re-run with --force to overwrite", githubWorkflowPath)
+	} else if err != nil && !os.IsNotExist(err) {
+		return fmt.Errorf("checking %s: %w", githubWorkflowPath, err)
+	}
+
+	if err := os.MkdirAll(filepath.Dir(githubWorkflowPath), 0o755); err != nil {
+		return fmt.Errorf("creating %s: %w", filepath.Dir(githubWorkflowPath), err)
+	}
+
+	content := renderGitHubWorkflow(model, secretName)
+	if err := os.WriteFile(githubWorkflowPath, []byte(content), 0o644); err != nil {
+		return fmt.Errorf("writing %s: %w", githubWorkflowPath, err)
+	}
+	return nil
+}
+
+// maybeSetProviderSecret offers to set the provider API key as a repository
+// secret via the gh CLI when it is available, interactive, the secret value is
+// present in the environment, and the user did not pass --no-secret.
+func maybeSetProviderSecret(ctx context.Context, secretName string) {
+	if githubInstallNoSecret || !isInteractive() {
+		return
+	}
+
+	if _, err := exec.LookPath("gh"); err != nil {
+		return
+	}
+
+	value := os.Getenv(secretName)
+	if value == "" {
+		fmt.Printf("ℹ️  %s is not set in your environment; set the repository secret manually with:\n", secretName)
+		fmt.Printf("     gh secret set %s\n", secretName)
+		return
+	}
+
+	var confirm bool
+	if err := huh.NewConfirm().
+		Title(fmt.Sprintf("Set the %s repository secret via gh?", secretName)).
+		Description("Uses the value from your current environment.").
+		Value(&confirm).
+		Run(); err != nil || !confirm {
+		return
+	}
+
+	// Feed the secret value via stdin rather than a command-line argument so
+	// the API key never appears in the process argument list.
+	cmd := exec.CommandContext(ctx, "gh", "secret", "set", secretName)
+	cmd.Stdin = strings.NewReader(value)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	if err := cmd.Run(); err != nil {
+		fmt.Printf("⚠️  Failed to set secret via gh: %v\n", err)
+		fmt.Printf("     Set it manually with: gh secret set %s\n", secretName)
+		return
+	}
+	fmt.Printf("✅ Set repository secret %s\n", secretName)
+}
+
+// printGitHubInstallNextSteps prints the manual follow-up actions a user must
+// take after the workflow is scaffolded.
+func printGitHubInstallNextSteps(secretName string) {
+	fmt.Println("\nNext steps:")
+	fmt.Printf("  1. Commit the workflow:  git add %s && git commit -m \"ci: add kit workflow\"\n", githubWorkflowPath)
+	fmt.Printf("  2. Set the %s repository secret (Settings → Secrets → Actions), if not already set.\n", secretName)
+	fmt.Println("  3. Comment '/kit <your request>' on an issue or pull request to trigger Kit.")
+}
@@ -0,0 +1,498 @@
+package cmd
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"strings"
+	"time"
+
+	"github.com/charmbracelet/log"
+	"github.com/spf13/cobra"
+)
+
+// commandToken is the mention that triggers Kit from a comment, mirroring the
+// `if:` guard in the generated workflow (.github/workflows/kit.yml).
+const commandToken = "/kit"
+
+// subprocessTimeout bounds each git/gh invocation so a stalled network call or
+// an unexpected auth prompt cannot hang the Actions job indefinitely.
+const subprocessTimeout = 30 * time.Second
+
+// agentTimeout bounds the headless agent run so a runaway turn cannot block the
+// job forever. GitHub Actions jobs have their own ceiling, but a tighter bound
+// keeps feedback fast and costs predictable.
+const agentTimeout = 20 * time.Minute
+
+// botName / botEmail are the dedicated identity commits are attributed to, so
+// Kit's changes are clearly distinguishable from human authors in history.
+const (
+	botName  = "kit-agent[bot]"
+	botEmail = "kit-agent[bot]@users.noreply.github.com"
+)
+
+// writeAssociations are the GitHub author_association values that imply
+// write/admin access. Only these may trigger the handler.
+var writeAssociations = map[string]bool{
+	"OWNER":        true,
+	"MEMBER":       true,
+	"COLLABORATOR": true,
+}
+
+var (
+	githubRunModel  string
+	githubRunDryRun bool
+)
+
+// githubRunCmd is the runtime half of the GitHub integration. It is invoked by
+// the bundled composite action (action.yml) inside a GitHub Actions runner once
+// a collaborator comments '/kit <request>' on an issue or pull request. It reads
+// the triggering event, enforces permissions, runs the agent headlessly against
+// the comment/PR context, and responds by posting a comment and — when the agent
+// leaves changes — opening a pull request.
+var githubRunCmd = &cobra.Command{
+	Use:   "run",
+	Short: "Run Kit against the current GitHub Actions event (used by the kit action)",
+	Long: `Run Kit against the current GitHub Actions event.
+
+This command is normally invoked by the bundled composite action inside a
+GitHub Actions runner; you rarely run it by hand. It reads the triggering
+event from GITHUB_EVENT_PATH, verifies the commenter has write/admin access,
+reacts with an emoji while it works, runs the agent non-interactively against
+the issue thread or pull request, posts the response as a comment, and — if the
+agent modified files — pushes a kit-agent[bot] branch and opens a pull request.
+
+Set --dry-run (or KIT_GITHUB_DRY_RUN=1) to log every git/gh side effect and
+skip the agent run instead of executing them.`,
+	Args: cobra.NoArgs,
+	RunE: runGitHubRun,
+}
+
+func init() {
+	githubRunCmd.Flags().StringVarP(&githubRunModel, "model", "m", "", "provider/model the agent should use (falls back to $MODEL, then a default)")
+	githubRunCmd.Flags().BoolVar(&githubRunDryRun, "dry-run", false, "log git/gh side effects and skip the agent run instead of executing them")
+	githubCmd.AddCommand(githubRunCmd)
+}
+
+// --- GitHub event types ------------------------------------------------------
+
+type ghUser struct {
+	Login string `json:"login"`
+}
+
+type ghComment struct {
+	ID                int64  `json:"id"`
+	Body              string `json:"body"`
+	AuthorAssociation string `json:"author_association"`
+	User              ghUser `json:"user"`
+}
+
+type ghIssue struct {
+	Number      int             `json:"number"`
+	Title       string          `json:"title"`
+	Body        string          `json:"body"`
+	PullRequest json.RawMessage `json:"pull_request"`
+}
+
+type ghPull struct {
+	Number int    `json:"number"`
+	Title  string `json:"title"`
+	Body   string `json:"body"`
+}
+
+type ghRepo struct {
+	FullName      string `json:"full_name"`
+	DefaultBranch string `json:"default_branch"`
+}
+
+type ghEvent struct {
+	Action      string     `json:"action"`
+	Comment     *ghComment `json:"comment"`
+	Issue       *ghIssue   `json:"issue"`
+	PullRequest *ghPull    `json:"pull_request"`
+	Repository  ghRepo     `json:"repository"`
+}
+
+// trigger normalises a single invocation across issue_comment and
+// pull_request_review_comment events.
+type trigger struct {
+	repo          string
+	defaultBranch string
+	number        int    // issue or PR number
+	isPR          bool   // true when the target is a pull request
+	commentID     int64  // triggering comment id (for reactions)
+	commentKind   string // "issues" or "pulls" — reaction API path segment
+	author        string
+	association   string
+	request       string // the user's instruction (comment body minus the token)
+	title         string
+	body          string
+}
+
+// runGitHubRun is the entry point wired to `kit github run`.
+func runGitHubRun(cmd *cobra.Command, _ []string) error {
+	ctx := cmd.Context()
+
+	if !inGitHubActions() && !githubDryRun() {
+		return fmt.Errorf("kit github run is meant to run inside GitHub Actions (set GITHUB_ACTIONS=true or pass --dry-run)")
+	}
+
+	event, err := loadGitHubEvent()
+	if err != nil {
+		return err
+	}
+
+	tr, err := buildTrigger(event)
+	if err != nil {
+		// Not an actionable trigger (the workflow `if:` normally prevents this).
+		log.Info("github run: nothing to do", "reason", err)
+		return nil
+	}
+
+	if !writeAssociations[strings.ToUpper(tr.association)] {
+		log.Warn("github run: ignoring /kit from unauthorized author",
+			"author", tr.author, "association", tr.association)
+		return nil
+	}
+
+	model := resolveRunModel()
+	log.Info("github run: handling trigger",
+		"repo", tr.repo, "number", tr.number, "pr", tr.isPR, "author", tr.author, "model", model)
+
+	// React with 👀 so the human sees Kit picked up the request.
+	addReaction(ctx, tr, "eyes")
+
+	gathered := gatherContext(ctx, tr)
+	prompt := buildPrompt(tr, gathered)
+
+	response, runErr := runAgent(ctx, model, prompt)
+	if runErr != nil {
+		postComment(ctx, tr, "⚠️ Kit hit an error while processing this request:\n\n```\n"+runErr.Error()+"\n```")
+		addReaction(ctx, tr, "confused")
+		return runErr
+	}
+
+	response = strings.TrimSpace(response)
+	if response == "" {
+		response = "Kit finished without a textual response."
+	}
+
+	prURL := ""
+	if hasUncommittedChanges(ctx) {
+		prURL = openPullRequest(ctx, tr, response)
+	}
+
+	comment := response
+	if prURL != "" {
+		comment += "\n\n---\nOpened a pull request with the changes: " + prURL
+	}
+	postComment(ctx, tr, comment)
+	addReaction(ctx, tr, "rocket")
+	return nil
+}
+
+// resolveRunModel picks the model: --model flag, then $MODEL, then the default.
+func resolveRunModel() string {
+	if m := strings.TrimSpace(githubRunModel); m != "" {
+		return m
+	}
+	if m := strings.TrimSpace(os.Getenv("MODEL")); m != "" {
+		return m
+	}
+	return defaultGitHubModel
+}
+
+func inGitHubActions() bool {
+	return os.Getenv("GITHUB_ACTIONS") == "true"
+}
+
+// githubDryRun reports whether side effects should be logged instead of run.
+func githubDryRun() bool {
+	return githubRunDryRun || os.Getenv("KIT_GITHUB_DRY_RUN") != ""
+}
+
+// loadGitHubEvent reads and decodes the GitHub Actions event payload.
+func loadGitHubEvent() (*ghEvent, error) {
+	path := os.Getenv("GITHUB_EVENT_PATH")
+	if path == "" {
+		return nil, fmt.Errorf("GITHUB_EVENT_PATH is not set")
+	}
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, fmt.Errorf("reading event payload: %w", err)
+	}
+	var event ghEvent
+	if err := json.Unmarshal(data, &event); err != nil {
+		return nil, fmt.Errorf("parsing event payload: %w", err)
+	}
+	return &event, nil
+}
+
+// buildTrigger normalises an event into a trigger, or returns an error when the
+// event is not an actionable `/kit` comment.
+func buildTrigger(event *ghEvent) (*trigger, error) {
+	if event.Comment == nil {
+		return nil, fmt.Errorf("event has no comment; nothing to do")
+	}
+
+	request, ok := extractRequest(event.Comment.Body)
+	if !ok {
+		return nil, fmt.Errorf("comment does not contain the %q command", commandToken)
+	}
+
+	tr := &trigger{
+		repo:          event.Repository.FullName,
+		defaultBranch: event.Repository.DefaultBranch,
+		commentID:     event.Comment.ID,
+		author:        event.Comment.User.Login,
+		association:   event.Comment.AuthorAssociation,
+		request:       request,
+	}
+	if tr.defaultBranch == "" {
+		tr.defaultBranch = "main"
+	}
+
+	switch {
+	case event.Issue != nil:
+		tr.number = event.Issue.Number
+		tr.title = event.Issue.Title
+		tr.body = event.Issue.Body
+		tr.isPR = len(event.Issue.PullRequest) > 0
+		tr.commentKind = "issues"
+	case event.PullRequest != nil:
+		tr.number = event.PullRequest.Number
+		tr.title = event.PullRequest.Title
+		tr.body = event.PullRequest.Body
+		tr.isPR = true
+		tr.commentKind = "pulls"
+	default:
+		return nil, fmt.Errorf("event has no issue or pull_request target")
+	}
+
+	if tr.repo == "" {
+		return nil, fmt.Errorf("event is missing repository.full_name")
+	}
+	return tr, nil
+}
+
+// extractRequest pulls the instruction text out of a comment body that mentions
+// the command token. It only recognizes the token at the start of a line
+// (mirroring the workflow guard) or at the very end, so incidental mid-sentence
+// mentions like "please review /kit behavior" do not trigger the handler. It
+// returns the remainder of the matching line as the request.
+func extractRequest(body string) (string, bool) {
+	for line := range strings.SplitSeq(body, "\n") {
+		trimmed := strings.TrimSpace(line)
+		var rest string
+		switch {
+		case trimmed == commandToken:
+			return "", true
+		case strings.HasPrefix(trimmed, commandToken+" "):
+			rest = trimmed[len(commandToken):]
+		case strings.HasSuffix(trimmed, " "+commandToken):
+			return "", true
+		default:
+			continue
+		}
+		return strings.TrimSpace(rest), true
+	}
+	return "", false
+}
+
+// gatherContext assembles the issue thread or PR diff to give the agent. It
+// always includes the title/body from the event payload, and — outside dry-run,
+// when `gh` is available — enriches with the comment thread and PR diff.
+func gatherContext(ctx context.Context, tr *trigger) string {
+	var b strings.Builder
+	target := "Issue"
+	if tr.isPR {
+		target = "Pull request"
+	}
+	fmt.Fprintf(&b, "%s #%d: %s\n", target, tr.number, tr.title)
+	if strings.TrimSpace(tr.body) != "" {
+		fmt.Fprintf(&b, "\n%s\n", strings.TrimSpace(tr.body))
+	}
+
+	if githubDryRun() || !commandExists("gh") {
+		return b.String()
+	}
+
+	num := fmt.Sprint(tr.number)
+	if tr.isPR {
+		if diff := ghOutput(ctx, "pr", "diff", num, "--repo", tr.repo); diff != "" {
+			fmt.Fprintf(&b, "\n## Diff\n```diff\n%s\n```\n", strings.TrimSpace(diff))
+		}
+		if comments := ghOutput(ctx, "pr", "view", num, "--repo", tr.repo, "--json", "comments", "--jq", ".comments[] | \"@\\(.author.login): \\(.body)\""); comments != "" {
+			fmt.Fprintf(&b, "\n## Comments\n%s\n", strings.TrimSpace(comments))
+		}
+	} else {
+		if comments := ghOutput(ctx, "issue", "view", num, "--repo", tr.repo, "--json", "comments", "--jq", ".comments[] | \"@\\(.author.login): \\(.body)\""); comments != "" {
+			fmt.Fprintf(&b, "\n## Comments\n%s\n", strings.TrimSpace(comments))
+		}
+	}
+	return b.String()
+}
+
+// buildPrompt constructs the instruction sent to the agent.
+func buildPrompt(tr *trigger, gathered string) string {
+	target := "issue"
+	if tr.isPR {
+		target = "pull request"
+	}
+	request := tr.request
+	if request == "" {
+		request = "(no explicit instruction — review the " + target + " and respond helpfully)"
+	}
+
+	var b strings.Builder
+	fmt.Fprintf(&b, "You are Kit, operating as an automated collaborator on the GitHub repository %s.\n\n", tr.repo)
+	fmt.Fprintf(&b, "@%s (access: %s) triggered you on %s #%d with this request:\n\n", tr.author, tr.association, target, tr.number)
+	fmt.Fprintf(&b, "%s\n\n", request)
+	fmt.Fprintf(&b, "## Context\n%s\n\n", strings.TrimSpace(gathered))
+	b.WriteString("Carry out the request. If you modify files, they will be committed to a new ")
+	b.WriteString("branch and a pull request will be opened automatically, so you do not need to ")
+	b.WriteString("commit or push yourself. Finish with a concise summary of what you did.")
+	return b.String()
+}
+
+// runAgent drives the agent headlessly by invoking this same binary in quiet,
+// ephemeral mode against the constructed prompt, and returns its response. In
+// dry-run it returns a canned response without spawning anything.
+func runAgent(ctx context.Context, model, prompt string) (string, error) {
+	if githubDryRun() {
+		log.Info("github run: [dry-run] would run agent", "model", model, "promptChars", len(prompt))
+		return "[dry-run] agent response", nil
+	}
+
+	exe, err := os.Executable()
+	if err != nil || exe == "" {
+		exe = "kit"
+	}
+
+	runCtx, cancel := context.WithTimeout(ctx, agentTimeout)
+	defer cancel()
+
+	args := []string{"--quiet", "--no-session", "--no-extensions"}
+	if model != "" {
+		args = append(args, "--model", model)
+	}
+	args = append(args, prompt)
+
+	cmd := exec.CommandContext(runCtx, exe, args...)
+	cmd.Stderr = os.Stderr // surface agent progress/errors in the Actions log
+	out, err := cmd.Output()
+	if err != nil {
+		return "", fmt.Errorf("agent run failed: %w", err)
+	}
+	return string(out), nil
+}
+
+// hasUncommittedChanges reports whether the agent produced working-tree changes.
+func hasUncommittedChanges(ctx context.Context) bool {
+	if githubDryRun() {
+		return os.Getenv("KIT_GITHUB_FAKE_DIRTY") != ""
+	}
+	return strings.TrimSpace(gitOutput(ctx, "status", "--porcelain")) != ""
+}
+
+// openPullRequest commits the working tree as kit-agent[bot], pushes a branch,
+// and opens a PR. It returns the PR URL, or "" on failure / dry-run.
+func openPullRequest(ctx context.Context, tr *trigger, summary string) string {
+	branch := fmt.Sprintf("kit/issue-%d-%d", tr.number, time.Now().Unix())
+
+	runGit(ctx, "checkout", "-b", branch)
+	runGit(ctx, "add", "-A")
+	runGit(ctx, "-c", "user.name="+botName, "-c", "user.email="+botEmail,
+		"commit", "-m", fmt.Sprintf("kit: address #%d", tr.number))
+
+	// `persist-credentials: false` in the workflow means the checkout left no
+	// push credentials behind. Re-establish them from GITHUB_TOKEN via gh's git
+	// credential helper, then push over the existing origin remote.
+	if !githubDryRun() {
+		runCmd(ctx, "gh", "auth", "setup-git")
+	}
+	runGit(ctx, "push", "origin", "HEAD:"+branch)
+
+	title := fmt.Sprintf("kit: changes for #%d", tr.number)
+	body := fmt.Sprintf("Automated changes from Kit in response to #%d.\n\n%s", tr.number, summary)
+	if githubDryRun() {
+		log.Info("github run: [dry-run] would open PR", "branch", branch, "base", tr.defaultBranch)
+		return ""
+	}
+	return strings.TrimSpace(ghOutput(ctx, "pr", "create", "--repo", tr.repo,
+		"--head", branch, "--base", tr.defaultBranch, "--title", title, "--body", body))
+}
+
+// addReaction adds an emoji reaction to the trigger comment.
+func addReaction(ctx context.Context, tr *trigger, content string) {
+	path := fmt.Sprintf("/repos/%s/%s/comments/%d/reactions", tr.repo, tr.commentKind, tr.commentID)
+	if githubDryRun() || !commandExists("gh") {
+		log.Info("github run: [dry-run] react", "content", content, "path", path)
+		return
+	}
+	runCmd(ctx, "gh", "api", "-X", "POST", path, "-f", "content="+content)
+}
+
+// postComment posts a comment back on the triggering issue or pull request.
+func postComment(ctx context.Context, tr *trigger, body string) {
+	sub := "issue"
+	if tr.isPR {
+		sub = "pr"
+	}
+	if githubDryRun() || !commandExists("gh") {
+		log.Info("github run: [dry-run] comment", "sub", sub, "number", tr.number, "chars", len(body))
+		return
+	}
+	runCmd(ctx, "gh", sub, "comment", fmt.Sprint(tr.number), "--repo", tr.repo, "--body", body)
+}
+
+// --- thin subprocess helpers -------------------------------------------------
+
+func commandExists(name string) bool {
+	_, err := exec.LookPath(name)
+	return err == nil
+}
+
+// runGit runs a mutating git command, logging instead of executing in dry-run.
+func runGit(ctx context.Context, args ...string) {
+	if githubDryRun() {
+		log.Info("github run: [dry-run] git", "args", strings.Join(args, " "))
+		return
+	}
+	runCmd(ctx, "git", args...)
+}
+
+// gitOutput runs a read-only git command and returns its stdout.
+func gitOutput(ctx context.Context, args ...string) string {
+	cmdCtx, cancel := context.WithTimeout(ctx, subprocessTimeout)
+	defer cancel()
+	out, err := exec.CommandContext(cmdCtx, "git", args...).Output()
+	if err != nil {
+		log.Error("github run: git failed", "args", strings.Join(args, " "), "err", err)
+		return ""
+	}
+	return string(out)
+}
+
+// ghOutput runs a gh command and returns its stdout.
+func ghOutput(ctx context.Context, args ...string) string {
+	cmdCtx, cancel := context.WithTimeout(ctx, subprocessTimeout)
+	defer cancel()
+	out, err := exec.CommandContext(cmdCtx, "gh", args...).Output()
+	if err != nil {
+		log.Error("github run: gh failed", "args", strings.Join(args, " "), "err", err)
+		return ""
+	}
+	return string(out)
+}
+
+// runCmd runs a command for its side effects, surfacing failures in the log.
+func runCmd(ctx context.Context, name string, args ...string) {
+	cmdCtx, cancel := context.WithTimeout(ctx, subprocessTimeout)
+	defer cancel()
+	if out, err := exec.CommandContext(cmdCtx, name, args...).CombinedOutput(); err != nil {
+		log.Error("github run: command failed", "cmd", name, "err", err, "output", strings.TrimSpace(string(out)))
+	}
+}
@@ -0,0 +1,190 @@
+package cmd
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+// setupEvent writes a GitHub event payload to a temp file, points
+// GITHUB_EVENT_PATH at it, and forces dry-run + Actions mode. It also resets
+// the run command's package-level flag state so tests are independent.
+func setupEvent(t *testing.T, payload string) {
+	t.Helper()
+	path := filepath.Join(t.TempDir(), "event.json")
+	if err := os.WriteFile(path, []byte(payload), 0o644); err != nil {
+		t.Fatalf("write event: %v", err)
+	}
+	t.Setenv("GITHUB_ACTIONS", "true")
+	t.Setenv("KIT_GITHUB_DRY_RUN", "1")
+	t.Setenv("GITHUB_EVENT_PATH", path)
+	t.Cleanup(func() {
+		githubRunModel = ""
+		githubRunDryRun = false
+	})
+}
+
+const issueCommentEvent = `{
+  "action": "created",
+  "comment": {
+    "id": 555,
+    "body": "/kit fix the broken parser",
+    "author_association": "OWNER",
+    "user": {"login": "alice"}
+  },
+  "issue": {"number": 42, "title": "Parser crashes on empty input", "body": "It panics."},
+  "repository": {"full_name": "acme/widgets", "default_branch": "main"}
+}`
+
+func TestExtractRequest(t *testing.T) {
+	tests := []struct {
+		name    string
+		body    string
+		want    string
+		wantHit bool
+	}{
+		{"start with request", "/kit fix the bug", "fix the bug", true},
+		{"bare token", "/kit", "", true},
+		{"trailing token", "hey /kit", "", true},
+		{"mid-sentence ignored", "please review /kit behavior in the docs", "", false},
+		{"no token", "just a normal comment", "", false},
+		{"token in second line", "thanks!\n/kit add tests", "add tests", true},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, hit := extractRequest(tt.body)
+			if hit != tt.wantHit || got != tt.want {
+				t.Errorf("extractRequest(%q) = (%q, %v), want (%q, %v)", tt.body, got, hit, tt.want, tt.wantHit)
+			}
+		})
+	}
+}
+
+func TestBuildTrigger_IssueComment(t *testing.T) {
+	event, err := func() (*ghEvent, error) {
+		setupEvent(t, issueCommentEvent)
+		return loadGitHubEvent()
+	}()
+	if err != nil {
+		t.Fatalf("loadGitHubEvent: %v", err)
+	}
+	tr, err := buildTrigger(event)
+	if err != nil {
+		t.Fatalf("buildTrigger: %v", err)
+	}
+	if tr.repo != "acme/widgets" || tr.number != 42 || tr.isPR || tr.request != "fix the broken parser" {
+		t.Errorf("unexpected trigger: %+v", tr)
+	}
+	if tr.commentKind != "issues" {
+		t.Errorf("commentKind = %q, want issues", tr.commentKind)
+	}
+}
+
+func TestBuildPrompt_ContainsContext(t *testing.T) {
+	setupEvent(t, issueCommentEvent)
+	event, _ := loadGitHubEvent()
+	tr, _ := buildTrigger(event)
+
+	prompt := buildPrompt(tr, gatherContext(context.Background(), tr))
+	for _, want := range []string{
+		"fix the broken parser",         // the request
+		"acme/widgets",                  // the repo
+		"issue #42",                     // the target
+		"@alice",                        // the author
+		"Parser crashes on empty input", // context: title
+		"It panics.",                    // context: body
+	} {
+		if !strings.Contains(prompt, want) {
+			t.Errorf("prompt missing %q\n---\n%s", want, prompt)
+		}
+	}
+}
+
+func TestRunGitHub_AuthorizedIssueComment(t *testing.T) {
+	setupEvent(t, issueCommentEvent)
+	if err := runGitHubRun(githubRunCmd, nil); err != nil {
+		t.Fatalf("runGitHubRun: %v", err)
+	}
+}
+
+func TestRunGitHub_UnauthorizedAssociation(t *testing.T) {
+	setupEvent(t, strings.Replace(issueCommentEvent, `"OWNER"`, `"NONE"`, 1))
+	// Should return nil (no-op) without attempting the agent run.
+	if err := runGitHubRun(githubRunCmd, nil); err != nil {
+		t.Fatalf("runGitHubRun should be a no-op for unauthorized authors, got: %v", err)
+	}
+}
+
+func TestRunGitHub_CommentWithoutToken(t *testing.T) {
+	setupEvent(t, strings.Replace(issueCommentEvent,
+		`"/kit fix the broken parser"`, `"just a normal comment"`, 1))
+	if err := runGitHubRun(githubRunCmd, nil); err != nil {
+		t.Fatalf("runGitHubRun should be a no-op without /kit, got: %v", err)
+	}
+}
+
+func TestRunGitHub_MidSentenceMentionIgnored(t *testing.T) {
+	setupEvent(t, strings.Replace(issueCommentEvent,
+		`"/kit fix the broken parser"`, `"please review /kit behavior in the docs"`, 1))
+	if err := runGitHubRun(githubRunCmd, nil); err != nil {
+		t.Fatalf("runGitHubRun should ignore mid-sentence mentions, got: %v", err)
+	}
+}
+
+func TestRunGitHub_PullRequestReviewComment(t *testing.T) {
+	setupEvent(t, `{
+  "action": "created",
+  "comment": {
+    "id": 999,
+    "body": "/kit review this change",
+    "author_association": "COLLABORATOR",
+    "user": {"login": "bob"}
+  },
+  "pull_request": {"number": 7, "title": "Add caching", "body": "Speeds things up."},
+  "repository": {"full_name": "acme/widgets", "default_branch": "main"}
+}`)
+	event, _ := loadGitHubEvent()
+	tr, err := buildTrigger(event)
+	if err != nil {
+		t.Fatalf("buildTrigger: %v", err)
+	}
+	if !tr.isPR || tr.number != 7 || tr.commentKind != "pulls" {
+		t.Errorf("unexpected PR trigger: %+v", tr)
+	}
+	if err := runGitHubRun(githubRunCmd, nil); err != nil {
+		t.Fatalf("runGitHubRun (PR): %v", err)
+	}
+}
+
+func TestRunGitHub_RequiresActionsOrDryRun(t *testing.T) {
+	// Neither GITHUB_ACTIONS nor dry-run set → must error rather than act.
+	t.Setenv("GITHUB_ACTIONS", "")
+	t.Setenv("KIT_GITHUB_DRY_RUN", "")
+	githubRunDryRun = false
+	t.Cleanup(func() { githubRunDryRun = false })
+	if err := runGitHubRun(githubRunCmd, nil); err == nil {
+		t.Fatal("expected an error when run outside Actions without --dry-run")
+	}
+}
+
+func TestResolveRunModel(t *testing.T) {
+	t.Cleanup(func() { githubRunModel = "" })
+
+	t.Setenv("MODEL", "")
+	githubRunModel = ""
+	if got := resolveRunModel(); got != defaultGitHubModel {
+		t.Errorf("default model = %q, want %q", got, defaultGitHubModel)
+	}
+
+	t.Setenv("MODEL", "openai/gpt-5")
+	if got := resolveRunModel(); got != "openai/gpt-5" {
+		t.Errorf("MODEL env model = %q, want openai/gpt-5", got)
+	}
+
+	githubRunModel = "anthropic/claude-sonnet-4-5"
+	if got := resolveRunModel(); got != "anthropic/claude-sonnet-4-5" {
+		t.Errorf("flag model = %q, want anthropic/claude-sonnet-4-5", got)
+	}
+}
@@ -0,0 +1,102 @@
+package cmd
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestProviderSecretEnvVar(t *testing.T) {
+	tests := []struct {
+		provider string
+		want     string
+	}{
+		{"anthropic", "ANTHROPIC_API_KEY"},
+		{"openai", "OPENAI_API_KEY"},
+		// Unknown provider falls back to "<PROVIDER>_API_KEY" with sanitization.
+		{"my-custom.provider", "MY_CUSTOM_PROVIDER_API_KEY"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.provider, func(t *testing.T) {
+			got := providerSecretEnvVar(tt.provider)
+			if got != tt.want {
+				t.Errorf("providerSecretEnvVar(%q) = %q, want %q", tt.provider, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestRenderGitHubWorkflow(t *testing.T) {
+	out := renderGitHubWorkflow("anthropic/claude-sonnet-4-5-20250929", "ANTHROPIC_API_KEY")
+
+	wantSubstrings := []string{
+		"name: kit",
+		"issue_comment:",
+		"pull_request_review_comment:",
+		"startsWith(github.event.comment.body, '/kit ')",
+		"github.event.comment.body == '/kit'",
+		"github.event.comment.author_association == 'OWNER'",
+		"github.event.comment.author_association == 'COLLABORATOR'",
+		"persist-credentials: false",
+		"uses: mark3labs/kit@v0",
+		"model: anthropic/claude-sonnet-4-5-20250929",
+		"GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}",
+		"ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}",
+		"contents: write",
+		"pull-requests: write",
+		"issues: write",
+	}
+	for _, want := range wantSubstrings {
+		if !strings.Contains(out, want) {
+			t.Errorf("rendered workflow missing %q\n---\n%s", want, out)
+		}
+	}
+}
+
+func TestWriteGitHubWorkflow(t *testing.T) {
+	dir := t.TempDir()
+	cwd, err := os.Getwd()
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Cleanup(func() { _ = os.Chdir(cwd) })
+	if err := os.Chdir(dir); err != nil {
+		t.Fatal(err)
+	}
+
+	// First write succeeds and creates nested directories.
+	if err := writeGitHubWorkflow("anthropic/claude-sonnet-4-5", "ANTHROPIC_API_KEY", false); err != nil {
+		t.Fatalf("writeGitHubWorkflow: %v", err)
+	}
+	data, err := os.ReadFile(githubWorkflowPath)
+	if err != nil {
+		t.Fatalf("reading workflow: %v", err)
+	}
+	if !strings.Contains(string(data), "model: anthropic/claude-sonnet-4-5") {
+		t.Errorf("workflow missing model line:\n%s", data)
+	}
+
+	// Second write without force must refuse to clobber.
+	if err := writeGitHubWorkflow("anthropic/claude-sonnet-4-5", "ANTHROPIC_API_KEY", false); err == nil {
+		t.Error("expected error when overwriting without --force, got nil")
+	}
+
+	// With force it overwrites.
+	if err := writeGitHubWorkflow("openai/gpt-5", "OPENAI_API_KEY", true); err != nil {
+		t.Fatalf("writeGitHubWorkflow with force: %v", err)
+	}
+	data, err = os.ReadFile(githubWorkflowPath)
+	if err != nil {
+		t.Fatalf("reading workflow: %v", err)
+	}
+	if !strings.Contains(string(data), "OPENAI_API_KEY") {
+		t.Errorf("forced overwrite did not update content:\n%s", data)
+	}
+
+	// Sanity: the file lives at the expected nested path.
+	if _, err := os.Stat(filepath.Join(dir, githubWorkflowPath)); err != nil {
+		t.Errorf("workflow not at expected path: %v", err)
+	}
+}
@@ -0,0 +1,52 @@
+package cmd
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"strings"
+
+	"golang.org/x/term"
+
+	"github.com/mark3labs/kit/pkg/kit"
+)
+
+// skillTrustPrompt returns a callback that gates project-local skill loading
+// on an interactive trust decision (issue #65, gap #8). Project-local skills
+// are injected into the system prompt, so a freshly cloned untrusted repo
+// could smuggle instructions into the agent. The prompt asks the user whether
+// to trust the directory before any project skill is loaded.
+//
+// It returns nil — meaning "load without prompting" — when Kit is not running
+// interactively (a non-TTY stdin, --quiet, or a non-interactive one-shot
+// prompt), so scripted and piped invocations keep their existing behaviour.
+func skillTrustPrompt() func(projectDir string, skillCount int) kit.TrustDecision {
+	// Only prompt for interactive terminal sessions.
+	if quietFlag || positionalPrompt != "" {
+		return nil
+	}
+	if !term.IsTerminal(int(os.Stdin.Fd())) {
+		return nil
+	}
+
+	return func(projectDir string, skillCount int) kit.TrustDecision {
+		noun := "skills"
+		if skillCount == 1 {
+			noun = "skill"
+		}
+		fmt.Printf("\nThis project provides %d %s under .agents/skills or .kit/skills:\n  %s\n",
+			skillCount, noun, projectDir)
+		fmt.Print("Load them into the agent? [t]rust always / [o]nce / [s]kip (default skip): ")
+
+		reader := bufio.NewReader(os.Stdin)
+		line, _ := reader.ReadString('\n')
+		switch strings.ToLower(strings.TrimSpace(line)) {
+		case "t", "trust", "a", "always":
+			return kit.TrustProject
+		case "o", "once", "y", "yes":
+			return kit.TrustProjectOnce
+		default:
+			return kit.SkipProjectSkills
+		}
+	}
+}
@@ -58,6 +58,7 @@ kit install github.com/mark3labs/kit/examples/extensions --local
 | `project-rules.go` | Project-specific rules | Session data, file reading |
 | `protected-paths.go` | Block dangerous operations | `OnToolCall` with blocking |
 | `permission-gate.go` | Confirm destructive actions | `OnToolCall` with confirmation |
+| `usage-budget.go` | Soft cost cap + per-turn report | `OnLLMUsage`, `SetState`/`GetState`, enriched `AgentEndEvent` |

 ### Tools & Commands

@@ -10,13 +10,21 @@ import (
 	"kit/ext"
 )

+// re matches !{...} with non-greedy content.
+var re = regexp.MustCompile(`!\{([^}]+)\}`)
+
 // Init expands inline bash expressions in user prompts before they reach the
-// LLM. Text like !{git branch --show-current} is replaced with the command's
-// stdout.
+// LLM. Text like !{git rev-parse --abbrev-ref HEAD} is replaced with the
+// command's stdout.
+//
+// In interactive mode the expansion happens at submit time via an editor
+// interceptor, so the expanded text is also visible in the user message
+// block on screen. In non-interactive mode (CLI, script, queue) the
+// expansion happens via OnInput transform.
 //
 // Examples:
 //
-//	"Fix the tests on !{git branch --show-current}"
+//	"Fix the tests on !{git rev-parse --abbrev-ref HEAD}"
 //	  → "Fix the tests on main"
 //
 //	"The current directory is !{pwd}"
@@ -24,29 +32,59 @@ import (
 //
 // Usage: kit -e examples/extensions/inline-bash.go
 func Init(api ext.API) {
-	// Matches !{...} with non-greedy content.
-	re := regexp.MustCompile(`!\{([^}]+)\}`)
+	// ── Interactive mode: editor interceptor ──────────────────────────
+	// Intercept Enter / Ctrl+D so we can expand !{...} BEFORE the
+	// SubmitMsg is created. This ensures the expanded text appears in
+	// the user message block on screen as well as in the LLM prompt.
+	api.OnSessionStart(func(_ ext.SessionStartEvent, ctx ext.Context) {
+		if !ctx.Interactive {
+			return
+		}
+		ctx.SetEditor(ext.EditorConfig{
+			HandleKey: func(key string, currentText string) ext.EditorKeyAction {
+				if (key == "enter" || key == "ctrl+d") && re.MatchString(currentText) {
+					expanded := expand(currentText)
+					// Clear the textarea asynchronously — calling
+					// SetEditorText synchronously from inside Update()
+					// would deadlock the BubbleTea event loop.
+					go ctx.SetEditorText("")
+					return ext.EditorKeyAction{
+						Type:       ext.EditorKeySubmit,
+						SubmitText: expanded,
+					}
+				}
+				return ext.EditorKeyAction{Type: ext.EditorKeyPassthrough}
+			},
+		})
+	})

+	// ── Non-interactive fallback: OnInput transform ──────────────────
+	// For CLI, script, and queue sources the editor interceptor is not
+	// active, so we fall back to OnInput which still rewrites the
+	// prompt text sent to the LLM.
 	api.OnInput(func(ev ext.InputEvent, ctx ext.Context) *ext.InputResult {
-		if !re.MatchString(ev.Text) {
+		if ev.Source == "interactive" || !re.MatchString(ev.Text) {
 			return nil
 		}

-		expanded := re.ReplaceAllStringFunc(ev.Text, func(match string) string {
-			// Extract the command between !{ and }.
-			cmd := re.FindStringSubmatch(match)[1]
-			cmd = strings.TrimSpace(cmd)
-
-			out, err := exec.Command("bash", "-c", cmd).Output()
-			if err != nil {
-				return match // keep original on error
-			}
-			return strings.TrimSpace(string(out))
-		})
-
 		return &ext.InputResult{
 			Action: "transform",
-			Text:   expanded,
+			Text:   expand(ev.Text),
 		}
 	})
 }
+
+// expand replaces every !{cmd} in text with the command's stdout.
+// On error the original !{cmd} token is preserved.
+func expand(text string) string {
+	return re.ReplaceAllStringFunc(text, func(match string) string {
+		cmd := re.FindStringSubmatch(match)[1]
+		cmd = strings.TrimSpace(cmd)
+
+		out, err := exec.Command("bash", "-c", cmd).Output()
+		if err != nil {
+			return match // keep original on error
+		}
+		return strings.TrimSpace(string(out))
+	})
+}
@@ -0,0 +1,110 @@
+//go:build ignore
+
+// phase-handoff.go demonstrates ctx.NewSession by automating the multi-phase
+// workflow pattern: the agent works through a spec, writes a HANDOFF.md at
+// the end of each phase, then a fresh session picks up where the last one
+// left off.
+//
+// Two trigger modes are provided:
+//
+//  1. Automatic — when an assistant message ends with the sentinel
+//     "<HANDOFF_READY>", the extension starts a new session and pre-loads
+//     HANDOFF.md as the first prompt. Use this when you want the agent to
+//     hand off control to itself with no user intervention.
+//
+//  2. Manual — the /handoff slash command starts a new session immediately
+//     with the same handoff prompt. Useful when you finish a phase by hand
+//     and want to clear the context window before the next one starts.
+//
+// Usage:
+//
+//	kit -e examples/extensions/phase-handoff.go
+//
+// Have your spec-driving agent write a HANDOFF.md at the end of each phase
+// and finish its message with the literal string `<HANDOFF_READY>`. The
+// next session boots automatically and reads HANDOFF.md as @file context.
+
+package main
+
+import (
+	"strings"
+
+	"kit/ext"
+)
+
+// HANDOFFSentinel is the marker the agent appends to its last message to
+// request an automatic session switch. Change this to whatever fits your
+// workflow.
+const HANDOFFSentinel = "<HANDOFF_READY>"
+
+// HANDOFFPrompt is the first prompt the new session receives. The leading
+// "@HANDOFF.md" triggers Kit's @file expansion, inlining the handoff file's
+// contents as XML-wrapped context.
+const HANDOFFPrompt = "Read @HANDOFF.md and continue with the next phase."
+
+func Init(api ext.API) {
+	// Automatic trigger: detect the sentinel at the end of an agent turn.
+	api.OnAgentEnd(func(e ext.AgentEndEvent, ctx ext.Context) {
+		msgs := ctx.GetMessages()
+		if len(msgs) == 0 {
+			return
+		}
+		last := msgs[len(msgs)-1]
+		if last.Role != "assistant" || !strings.Contains(last.Content, HANDOFFSentinel) {
+			return
+		}
+
+		// NewSession blocks while the agent finishes settling and then while
+		// the TUI completes the switch; run it in a goroutine so the agent's
+		// turn-end pipeline isn't stalled. The internal wait-for-idle (added
+		// in response to issue #63) makes this reliable even when post-turn
+		// tooling (formatters, on-save hooks, hidden tool calls) extends the
+		// busy window past AgentEnd.
+		go func() {
+			if err := ctx.NewSession(HANDOFFPrompt); err != nil {
+				ctx.PrintError("phase-handoff: " + err.Error())
+				return
+			}
+			ctx.PrintInfo("phase-handoff: started a fresh session from HANDOFF.md")
+		}()
+	})
+
+	// Manual trigger: /handoff [optional override prompt]
+	api.RegisterCommand(ext.CommandDef{
+		Name:        "handoff",
+		Description: "Start a new session, optionally with a custom prompt",
+		Execute: func(args string, ctx ext.Context) (string, error) {
+			prompt := strings.TrimSpace(args)
+			if prompt == "" {
+				prompt = HANDOFFPrompt
+			}
+			if err := ctx.NewSession(prompt); err != nil {
+				return "", err
+			}
+			return "", nil
+		},
+	})
+
+	// Optional safeguard: surface the next prompt so the user can confirm
+	// before the auto-handoff proceeds. Set kit option "handoff.confirm=1"
+	// to enable.
+	api.OnBeforeSessionSwitch(func(e ext.BeforeSessionSwitchEvent, ctx ext.Context) *ext.BeforeSessionSwitchResult {
+		if ctx.GetOption("handoff.confirm") != "1" {
+			return nil
+		}
+		if e.InitialPrompt == "" {
+			return nil
+		}
+		resp := ctx.PromptConfirm(ext.PromptConfirmConfig{
+			Message:      "Start a new session with prompt:\n  " + e.InitialPrompt + "\n\nProceed?",
+			DefaultValue: true,
+		})
+		if resp.Cancelled || !resp.Value {
+			return &ext.BeforeSessionSwitchResult{
+				Cancel: true,
+				Reason: "handoff cancelled by user",
+			}
+		}
+		return nil
+	})
+}
@@ -13,7 +13,7 @@ import (
 // without panicking and properly guards nil ctx calls.
 func TestSubagentMonitor_SessionStart(t *testing.T) {
 	harness := test.New(t)
-	harness.LoadFile("../../.kit/extensions/subagent-monitor.go")
+	harness.LoadFile("./subagent-monitor.go")

 	// Emit SessionStart - should not panic even with nil ctx functions
 	_, err := harness.Emit(extensions.SessionStartEvent{SessionID: "test-session"})
@@ -26,7 +26,7 @@ func TestSubagentMonitor_SessionStart(t *testing.T) {
 // creates entries and emits widget updates.
 func TestSubagentMonitor_SubagentLifecycle(t *testing.T) {
 	harness := test.New(t)
-	harness.LoadFile("../../.kit/extensions/subagent-monitor.go")
+	harness.LoadFile("./subagent-monitor.go")

 	// Start session
 	_, err := harness.Emit(extensions.SessionStartEvent{SessionID: "test-session"})
@@ -84,7 +84,7 @@ func TestSubagentMonitor_SubagentLifecycle(t *testing.T) {
 // TestSubagentMonitor_MultipleSubagents verifies multiple parallel subagents.
 func TestSubagentMonitor_MultipleSubagents(t *testing.T) {
 	harness := test.New(t)
-	harness.LoadFile("../../.kit/extensions/subagent-monitor.go")
+	harness.LoadFile("./subagent-monitor.go")

 	_, err := harness.Emit(extensions.SessionStartEvent{SessionID: "test-session"})
 	if err != nil {
@@ -130,11 +130,63 @@ func TestSubagentMonitor_MultipleSubagents(t *testing.T) {
 	time.Sleep(100 * time.Millisecond)
 }

+// TestSubagentMonitor_ConcurrentSubagents verifies no panics when multiple
+// subagents emit events concurrently from different goroutines.
+func TestSubagentMonitor_ConcurrentSubagents(t *testing.T) {
+	harness := test.New(t)
+	harness.LoadFile("./subagent-monitor.go")
+
+	_, err := harness.Emit(extensions.SessionStartEvent{SessionID: "test-session"})
+	if err != nil {
+		t.Fatalf("SessionStart should not error: %v", err)
+	}
+
+	// Start 5 subagents concurrently
+	done := make(chan struct{}, 5)
+	for i := range 5 {
+		go func(idx int) {
+			defer func() { done <- struct{}{} }()
+
+			callID := fmt.Sprintf("concurrent-%d", idx)
+			task := fmt.Sprintf("concurrent task %d", idx)
+
+			_, _ = harness.Emit(extensions.SubagentStartEvent{
+				ToolCallID: callID,
+				Task:       task,
+			})
+
+			// Emit many chunks rapidly
+			for j := range 20 {
+				_, _ = harness.Emit(extensions.SubagentChunkEvent{
+					ToolCallID: callID,
+					Task:       task,
+					ChunkType:  "text",
+					Content:    fmt.Sprintf("agent %d chunk %d", idx, j),
+				})
+			}
+
+			_, _ = harness.Emit(extensions.SubagentEndEvent{
+				ToolCallID: callID,
+				Task:       task,
+				Response:   "done",
+			})
+		}(i)
+	}
+
+	// Wait for all goroutines
+	for range 5 {
+		<-done
+	}
+
+	// Allow any final processing
+	time.Sleep(200 * time.Millisecond)
+}
+
 // TestSubagentMonitor_SessionShutdown verifies shutdown doesn't panic
 // even with nil ctx functions.
 func TestSubagentMonitor_SessionShutdown(t *testing.T) {
 	harness := test.New(t)
-	harness.LoadFile("../../.kit/extensions/subagent-monitor.go")
+	harness.LoadFile("./subagent-monitor.go")

 	// Start then shutdown
 	_, err := harness.Emit(extensions.SessionStartEvent{SessionID: "test-session"})
@@ -0,0 +1,153 @@
+//go:build ignore
+
+// sudo-handler.go - Extension to handle sudo password prompts securely
+//
+// This extension intercepts bash commands containing "sudo" and:
+// 1. Checks if sudo credentials are already cached (via sudo -n)
+// 2. If not cached, prompts the user for their password (with masking)
+// 3. Temporarily sets SUDO_PASSWORD environment variable for execution
+// 4. The bash tool automatically uses sudo -S -p '' to pipe the password
+//
+// Usage: kit -e examples/extensions/sudo-handler.go
+//
+// Security notes:
+// - Password is only stored in memory for the duration of the session
+// - Password is never logged or displayed
+// - Each session requires re-authentication (sudo -k is used)
+// - The SUDO_PASSWORD env var is set only during tool execution
+
+package main
+
+import (
+	"encoding/json"
+	"os"
+	"strings"
+	"sync"
+
+	"kit/ext"
+)
+
+var (
+	// cachedPassword stores the sudo password for the session
+	cachedPassword string
+	// hasCachedPassword tracks if we have a valid cached password
+	hasCachedPassword bool
+	// mu protects cached password access
+	mu sync.RWMutex
+)
+
+// Init sets up the sudo handler extension
+func Init(api ext.API) {
+	api.OnToolCall(func(tc ext.ToolCallEvent, ctx ext.Context) *ext.ToolCallResult {
+		if tc.ToolName != "bash" {
+			return nil
+		}
+
+		// Parse the command from tool input
+		var input struct {
+			Command string `json:"command"`
+		}
+		if err := json.Unmarshal([]byte(tc.Input), &input); err != nil {
+			return nil
+		}
+
+		// Check if command contains sudo
+		if !containsSudo(input.Command) {
+			return nil
+		}
+
+		// Check if we already have cached credentials
+		mu.RLock()
+		password := cachedPassword
+		hasCached := hasCachedPassword
+		mu.RUnlock()
+
+		if hasCached {
+			// Use cached password
+			os.Setenv("SUDO_PASSWORD", password)
+			return nil
+		}
+
+		// No cached password - prompt user
+		result := ctx.PromptInput(ext.PromptInputConfig{
+			Message:     "🔐 Sudo password required for:\n  " + truncateCommand(input.Command, 60),
+			Placeholder: "Enter your password",
+		})
+
+		if result.Cancelled {
+			return &ext.ToolCallResult{
+				Block:  true,
+				Reason: "Sudo password prompt cancelled by user",
+			}
+		}
+
+		if result.Value == "" {
+			return &ext.ToolCallResult{
+				Block:  true,
+				Reason: "No password provided",
+			}
+		}
+
+		// Cache the password for this session
+		mu.Lock()
+		cachedPassword = result.Value
+		hasCachedPassword = true
+		mu.Unlock()
+
+		// Set environment variable for the bash tool to use
+		os.Setenv("SUDO_PASSWORD", result.Value)
+
+		// Show confirmation (without revealing password)
+		ctx.PrintInfo("Sudo password cached for this session")
+
+		return nil
+	})
+
+	// Clear cached password when session ends
+	api.OnSessionShutdown(func(event ext.SessionShutdownEvent, ctx ext.Context) {
+		mu.Lock()
+		cachedPassword = ""
+		hasCachedPassword = false
+		mu.Unlock()
+		os.Unsetenv("SUDO_PASSWORD")
+	})
+}
+
+// containsSudo checks if the command contains sudo as a command (not in a string)
+func containsSudo(command string) bool {
+	// Simple check for sudo as a word, not inside quotes or as part of another word
+	lower := strings.ToLower(command)
+
+	// Check for sudo at start or after separators
+	patterns := []string{
+		"sudo ",
+		"sudo\t",
+		";sudo ",
+		"&& sudo ",
+		"|| sudo ",
+		"| sudo ",
+		"$(sudo ",
+		"`sudo ",
+	}
+
+	for _, pattern := range patterns {
+		if strings.Contains(lower, pattern) {
+			return true
+		}
+	}
+
+	// Check if command starts with sudo
+	if strings.HasPrefix(lower, "sudo ") {
+		return true
+	}
+
+	return false
+}
+
+// truncateCommand truncates a long command for display
+func truncateCommand(cmd string, maxLen int) string {
+	if len(cmd) <= maxLen {
+		return cmd
+	}
+	return cmd[:maxLen-3] + "..."
+}
@@ -0,0 +1,87 @@
+//go:build ignore
+
+package main
+
+import (
+	"fmt"
+	"strconv"
+
+	"kit/ext"
+)
+
+// Init demonstrates the three primitives added in issue #53:
+//
+//  1. api.OnLLMUsage(...) — per-LLM-call usage callback with token + cost
+//     deltas. Use this for budget enforcement that reacts between calls
+//     within a single agent turn, rather than only at turn boundaries.
+//
+//  2. ctx.SetState / ctx.GetState / ctx.DeleteState / ctx.ListState —
+//     last-write-wins, session-scoped key-value store backed by a sidecar
+//     file. Use this for snapshot state (current value of X) instead of
+//     ctx.AppendEntry, which is append-only and bloats branch reads.
+//
+//  3. ext.AgentEndEvent.ToolCallCount / .ToolNames / .LLMCallCount /
+//     .InputTokensDelta / .OutputTokensDelta / .CostDelta / .DurationMs —
+//     per-turn aggregates so observer extensions don't need to maintain
+//     parallel bookkeeping.
+//
+// Together these support a simple soft-budget cap: warn when the
+// cumulative cost in this session exceeds a threshold, and print a
+// per-turn report on AgentEnd.
+//
+// Usage: kit -e examples/extensions/usage-budget.go
+func Init(api ext.API) {
+	const warnAtKey = "usage-budget:warn-at-usd"
+
+	// 1. Print per-LLM-call usage with provider, model, and cost.
+	api.OnLLMUsage(func(e ext.LLMUsageEvent, ctx ext.Context) {
+		ctx.Print(fmt.Sprintf(
+			"[usage] step=%d %s/%s tokens=↑%d ↓%d cache=↑%d/↓%d cost=$%.4f (%s)",
+			e.StepNumber, e.Provider, e.Model,
+			e.InputTokens, e.OutputTokens,
+			e.CacheWriteTokens, e.CacheReadTokens,
+			e.Cost, e.FinishReason,
+		))
+
+		// 2. Persist running total in last-write-wins state.
+		current := 0.0
+		if raw, ok := ctx.GetState("usage-budget:total-cost"); ok {
+			current, _ = strconv.ParseFloat(raw, 64)
+		}
+		current += e.Cost
+		ctx.SetState("usage-budget:total-cost", strconv.FormatFloat(current, 'f', 6, 64))
+
+		// Soft warn-at threshold (configurable via state).
+		warnAt := 0.50
+		if raw, ok := ctx.GetState(warnAtKey); ok {
+			if v, err := strconv.ParseFloat(raw, 64); err == nil {
+				warnAt = v
+			}
+		}
+		if current > warnAt {
+			ctx.PrintError(fmt.Sprintf(
+				"[usage] session cost $%.4f exceeds soft cap $%.2f",
+				current, warnAt,
+			))
+		}
+	})
+
+	// 3. Print a per-turn summary using the enriched AgentEndEvent.
+	api.OnAgentEnd(func(e ext.AgentEndEvent, ctx ext.Context) {
+		ctx.Print(fmt.Sprintf(
+			"[turn] stop=%s tools=%d llm-calls=%d tokens=↑%d ↓%d cost=$%.4f duration=%dms",
+			e.StopReason, e.ToolCallCount, e.LLMCallCount,
+			e.InputTokensDelta, e.OutputTokensDelta, e.CostDelta, e.DurationMs,
+		))
+		if len(e.ToolNames) > 0 {
+			ctx.Print(fmt.Sprintf("[turn] tool order: %v", e.ToolNames))
+		}
+	})
+
+	// Bootstrap default soft cap once per session.
+	api.OnSessionStart(func(e ext.SessionStartEvent, ctx ext.Context) {
+		if _, ok := ctx.GetState(warnAtKey); !ok {
+			ctx.SetState(warnAtKey, "0.50")
+		}
+	})
+}
@@ -42,4 +42,14 @@ defer host.Close()
 response, err := host.Prompt(ctx, "Hello!")
 ```

+Or use the functional-options constructor for quick setups (streaming defaults on):
+
+```go
+host, err := kit.NewAgent(ctx,
+    kit.WithModel("anthropic/claude-sonnet-4-5-20250929"),
+    kit.WithSystemPrompt("You are a helpful assistant."),
+    kit.Ephemeral(),
+)
+```
+
 See the [SDK README](../../pkg/kit/README.md) for the full API reference.
@@ -62,7 +62,7 @@ func main() {
 		}
 	})
 	// Subscribe to streaming chunks.
-	host3.OnStreaming(func(e kit.MessageUpdateEvent) {
+	host3.OnMessageUpdate(func(e kit.MessageUpdateEvent) {
 		fmt.Print(e.Chunk)
 	})

@@ -1,101 +1,103 @@
 module github.com/mark3labs/kit

-go 1.26.1
+go 1.26.4

 require (
 	charm.land/bubbles/v2 v2.1.0
-	charm.land/bubbletea/v2 v2.0.2
-	charm.land/fantasy v0.17.1
+	charm.land/bubbletea/v2 v2.0.7
+	charm.land/fantasy v0.32.0
 	charm.land/huh/v2 v2.0.3
-	charm.land/lipgloss/v2 v2.0.2
-	github.com/alecthomas/chroma/v2 v2.23.1
+	charm.land/lipgloss/v2 v2.0.4
+	github.com/alecthomas/chroma/v2 v2.27.0
 	github.com/atotto/clipboard v0.1.4
 	github.com/aymanbagabas/go-udiff v0.4.1
+	github.com/charmbracelet/colorprofile v0.4.3
 	github.com/charmbracelet/fang v1.0.0
 	github.com/charmbracelet/log v1.0.0
-	github.com/charmbracelet/openai-go v0.0.0-20260319145158-d0740cc34266
-	github.com/charmbracelet/ultraviolet v0.0.0-20260330092749-0f94982c930b
+	github.com/charmbracelet/openai-go v0.0.0-20260617131321-5e4b9c18c4be
+	github.com/charmbracelet/ultraviolet v0.0.0-20260615092913-2399af76d5b1
+	github.com/charmbracelet/x/editor v0.2.0
 	github.com/clipperhouse/displaywidth v0.11.0
 	github.com/clipperhouse/uax29/v2 v2.7.0
-	github.com/coder/acp-go-sdk v0.6.3
-	github.com/fsnotify/fsnotify v1.9.0
+	github.com/coder/acp-go-sdk v0.13.5
+	github.com/fsnotify/fsnotify v1.10.1
 	github.com/indaco/herald v0.13.0
 	github.com/indaco/herald-md v0.3.0
-	github.com/mark3labs/mcp-go v0.47.0
+	github.com/mark3labs/mcp-go v0.55.0
 	github.com/spf13/cobra v1.10.2
 	github.com/spf13/viper v1.21.0
 	github.com/traefik/yaegi v0.16.1
-	golang.org/x/term v0.41.0
+	golang.org/x/image v0.42.0
+	golang.org/x/term v0.44.0
 	gopkg.in/yaml.v3 v3.0.1
 )

 require (
 	cloud.google.com/go v0.123.0 // indirect
-	cloud.google.com/go/auth v0.19.0 // indirect
+	cloud.google.com/go/auth v0.20.0 // indirect
 	cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect
 	cloud.google.com/go/compute/metadata v0.9.0 // indirect
-	github.com/Azure/azure-sdk-for-go/sdk/azcore v1.21.0 // indirect
+	github.com/Azure/azure-sdk-for-go/sdk/azcore v1.22.0 // indirect
 	github.com/Azure/azure-sdk-for-go/sdk/internal v1.12.0 // indirect
-	github.com/aws/aws-sdk-go-v2 v1.41.5 // indirect
-	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.8 // indirect
-	github.com/aws/aws-sdk-go-v2/config v1.32.14 // indirect
-	github.com/aws/aws-sdk-go-v2/credentials v1.19.14 // indirect
-	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.21 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.21 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/ini v1.8.6 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.21 // indirect
-	github.com/aws/aws-sdk-go-v2/service/signin v1.0.9 // indirect
-	github.com/aws/aws-sdk-go-v2/service/sso v1.30.15 // indirect
-	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19 // indirect
-	github.com/aws/aws-sdk-go-v2/service/sts v1.41.10 // indirect
-	github.com/aws/smithy-go v1.24.3 // indirect
+	github.com/aws/aws-sdk-go-v2 v1.42.0 // indirect
+	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.13 // indirect
+	github.com/aws/aws-sdk-go-v2/config v1.32.25 // indirect
+	github.com/aws/aws-sdk-go-v2/credentials v1.19.24 // indirect
+	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.29 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.29 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.29 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.30 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.12 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.29 // indirect
+	github.com/aws/aws-sdk-go-v2/service/signin v1.2.0 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sso v1.31.3 // indirect
+	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.36.6 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sts v1.43.3 // indirect
+	github.com/aws/smithy-go v1.27.2 // indirect
 	github.com/catppuccin/go v0.3.0 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
 	github.com/charmbracelet/anthropic-sdk-go v0.0.0-20260223140439-63879b0b8dab // indirect
-	github.com/charmbracelet/colorprofile v0.4.3 // indirect
 	github.com/charmbracelet/harmonica v0.2.0 // indirect
 	github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834 // indirect
 	github.com/charmbracelet/x/cellbuf v0.0.15 // indirect
-	github.com/charmbracelet/x/exp/charmtone v0.0.0-20260330094520-2dce04b6f8a4 // indirect
+	github.com/charmbracelet/x/exp/charmtone v0.0.0-20260615092313-b57e5e6d29bb // indirect
 	github.com/charmbracelet/x/exp/ordered v0.1.0 // indirect
-	github.com/charmbracelet/x/exp/slice v0.0.0-20260330094520-2dce04b6f8a4 // indirect
+	github.com/charmbracelet/x/exp/slice v0.0.0-20260615092313-b57e5e6d29bb // indirect
 	github.com/charmbracelet/x/exp/strings v0.1.0 // indirect
 	github.com/charmbracelet/x/json v0.2.0 // indirect
 	github.com/charmbracelet/x/termios v0.1.1 // indirect
 	github.com/charmbracelet/x/windows v0.2.2 // indirect
-	github.com/dlclark/regexp2 v1.11.5 // indirect
+	github.com/dlclark/regexp2 v1.12.0 // indirect
+	github.com/dlclark/regexp2/v2 v2.2.2 // indirect
 	github.com/dustin/go-humanize v1.0.1 // indirect
-	github.com/felixge/httpsnoop v1.0.4 // indirect
-	github.com/go-json-experiment/json v0.0.0-20260214004413-d219187c3433 // indirect
+	github.com/felixge/httpsnoop v1.1.0 // indirect
+	github.com/go-json-experiment/json v0.0.0-20260601182631-00ed12fed2a6 // indirect
 	github.com/go-logfmt/logfmt v0.6.1 // indirect
 	github.com/go-logr/logr v1.4.3 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-viper/mapstructure/v2 v2.5.0 // indirect
 	github.com/goccy/go-yaml v1.19.2 // indirect
 	github.com/google/go-cmp v0.7.0 // indirect
-	github.com/google/jsonschema-go v0.4.2 // indirect
+	github.com/google/jsonschema-go v0.4.3 // indirect
 	github.com/google/s2a-go v0.1.9 // indirect
 	github.com/google/uuid v1.6.0 // indirect
-	github.com/googleapis/enterprise-certificate-proxy v0.3.14 // indirect
-	github.com/googleapis/gax-go/v2 v2.21.0 // indirect
+	github.com/googleapis/enterprise-certificate-proxy v0.3.16 // indirect
+	github.com/googleapis/gax-go/v2 v2.22.0 // indirect
 	github.com/gorilla/websocket v1.5.3 // indirect
-	github.com/kaptinlin/go-i18n v0.3.0 // indirect
-	github.com/kaptinlin/jsonpointer v0.4.17 // indirect
-	github.com/kaptinlin/jsonschema v0.7.7 // indirect
-	github.com/kaptinlin/messageformat-go v0.4.19 // indirect
+	github.com/kaptinlin/jsonpointer v0.4.26 // indirect
+	github.com/kaptinlin/jsonschema v0.8.1 // indirect
 	github.com/mitchellh/hashstructure/v2 v2.0.2 // indirect
 	github.com/muesli/mango v0.2.0 // indirect
 	github.com/muesli/mango-cobra v1.3.0 // indirect
 	github.com/muesli/mango-pflag v0.2.0 // indirect
 	github.com/muesli/roff v0.1.0 // indirect
-	github.com/pelletier/go-toml/v2 v2.3.0 // indirect
+	github.com/pelletier/go-toml/v2 v2.4.0 // indirect
 	github.com/sagikazarmark/locafero v0.12.0 // indirect
+	github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 // indirect
 	github.com/spf13/afero v1.15.0 // indirect
 	github.com/spf13/cast v1.10.0 // indirect
 	github.com/subosito/gotenv v1.6.0 // indirect
-	github.com/tidwall/gjson v1.18.0 // indirect
+	github.com/tidwall/gjson v1.19.0 // indirect
 	github.com/tidwall/match v1.2.0 // indirect
 	github.com/tidwall/pretty v1.2.1 // indirect
 	github.com/tidwall/sjson v1.2.5 // indirect
@@ -103,38 +105,38 @@ require (
 	github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
 	github.com/yuin/goldmark v1.8.2 // indirect
 	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
-	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.67.0 // indirect
-	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 // indirect
-	go.opentelemetry.io/otel v1.43.0 // indirect
-	go.opentelemetry.io/otel/metric v1.43.0 // indirect
-	go.opentelemetry.io/otel/trace v1.43.0 // indirect
+	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.69.0 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.69.0 // indirect
+	go.opentelemetry.io/otel v1.44.0 // indirect
+	go.opentelemetry.io/otel/metric v1.44.0 // indirect
+	go.opentelemetry.io/otel/trace v1.44.0 // indirect
 	go.yaml.in/yaml/v3 v3.0.4 // indirect
-	golang.org/x/crypto v0.49.0 // indirect
-	golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90 // indirect
-	golang.org/x/net v0.52.0 // indirect
+	golang.org/x/crypto v0.53.0 // indirect
+	golang.org/x/exp v0.0.0-20260611194520-c48552f49976 // indirect
+	golang.org/x/net v0.56.0 // indirect
 	golang.org/x/oauth2 v0.36.0 // indirect
 	golang.org/x/time v0.15.0 // indirect
-	google.golang.org/api v0.274.0 // indirect
-	google.golang.org/genai v1.52.1 // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect
-	google.golang.org/grpc v1.80.0 // indirect
+	google.golang.org/api v0.285.0 // indirect
+	google.golang.org/genai v1.61.0 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20260615183401-62b3387ff324 // indirect
+	google.golang.org/grpc v1.81.1 // indirect
 	google.golang.org/protobuf v1.36.11 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 )

 require (
 	github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
-	github.com/charmbracelet/x/ansi v0.11.6
+	github.com/charmbracelet/x/ansi v0.11.7
 	github.com/charmbracelet/x/term v0.2.2 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/lucasb-eyer/go-colorful v1.4.0 // indirect
-	github.com/mattn/go-isatty v0.0.20 // indirect
-	github.com/mattn/go-runewidth v0.0.22 // indirect
+	github.com/mattn/go-isatty v0.0.22 // indirect
+	github.com/mattn/go-runewidth v0.0.24 // indirect
 	github.com/muesli/cancelreader v0.2.2 // indirect
 	github.com/muesli/termenv v0.16.0 // indirect
 	github.com/rivo/uniseg v0.4.7 // indirect
-	github.com/spf13/pflag v1.0.10 // indirect
-	golang.org/x/sync v0.20.0 // indirect
-	golang.org/x/sys v0.42.0 // indirect
-	golang.org/x/text v0.35.0
+	github.com/spf13/pflag v1.0.10
+	golang.org/x/sync v0.21.0 // indirect
+	golang.org/x/sys v0.46.0 // indirect
+	golang.org/x/text v0.38.0
 )
@@ -1,23 +1,23 @@
 charm.land/bubbles/v2 v2.1.0 h1:YSnNh5cPYlYjPxRrzs5VEn3vwhtEn3jVGRBT3M7/I0g=
 charm.land/bubbles/v2 v2.1.0/go.mod h1:l97h4hym2hvWBVfmJDtrEHHCtkIKeTEb3TTJ4ZOB3wY=
-charm.land/bubbletea/v2 v2.0.2 h1:4CRtRnuZOdFDTWSff9r8QFt/9+z6Emubz3aDMnf/dx0=
-charm.land/bubbletea/v2 v2.0.2/go.mod h1:3LRff2U4WIYXy7MTxfbAQ+AdfM3D8Xuvz2wbsOD9OHQ=
-charm.land/fantasy v0.17.1 h1:SQzfnyJPDuQWt6e//KKmQmEEXdqHMC0IZz10XwkLcEM=
-charm.land/fantasy v0.17.1/go.mod h1:FF5ALCCHETacHJPBqU42CtwMInYQ0ul52fdzIHQMbQk=
+charm.land/bubbletea/v2 v2.0.7 h1:7qw2tTAVar7m7klOPBYfTB0mniv/RuexsYwMRNxSeL0=
+charm.land/bubbletea/v2 v2.0.7/go.mod h1:DGW2q8gvzHnOpMpZTORs0aySVHCox5C+2Svk0fci1qs=
+charm.land/fantasy v0.32.0 h1:tlC1qlOdXi2CkF6KB0x8YAAm3hiarI2/69u6pZmOZk8=
+charm.land/fantasy v0.32.0/go.mod h1:CWAFEOB21guhmt4qWN9sOnAHkZzVWjKbhxbPHG+oRs8=
 charm.land/huh/v2 v2.0.3 h1:2cJsMqEPwSywGHvdlKsJyQKPtSJLVnFKyFbsYZTlLkU=
 charm.land/huh/v2 v2.0.3/go.mod h1:93eEveeeqn47MwiC3tf+2atZ2l7Is88rAtmZNZ8x9Wc=
-charm.land/lipgloss/v2 v2.0.2 h1:xFolbF8JdpNkM2cEPTfXEcW1p6NRzOWTSamRfYEw8cs=
-charm.land/lipgloss/v2 v2.0.2/go.mod h1:KjPle2Qd3YmvP1KL5OMHiHysGcNwq6u83MUjYkFvEkM=
+charm.land/lipgloss/v2 v2.0.4 h1:lcPeVtcp23SNra7lHy8iYE4UC2aIipVQ47sbGyyxR5Q=
+charm.land/lipgloss/v2 v2.0.4/go.mod h1:0653x8epbZSzdDfO/XPS1a/uYPOBeSsCssOpJOqDzik=
 cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE=
 cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU=
-cloud.google.com/go/auth v0.19.0 h1:DGYwtbcsGsT1ywuxsIoWi1u/vlks0moIblQHgSDgQkQ=
-cloud.google.com/go/auth v0.19.0/go.mod h1:2Aph7BT2KnaSFOM0JDPyiYgNh6PL9vGMiP8CUIXZ+IY=
+cloud.google.com/go/auth v0.20.0 h1:kXTssoVb4azsVDoUiF8KvxAqrsQcQtB53DcSgta74CA=
+cloud.google.com/go/auth v0.20.0/go.mod h1:942/yi/itH1SsmpyrbnTMDgGfdy2BUqIKyd0cyYLc5Q=
 cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc=
 cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c=
 cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs=
 cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10=
-github.com/Azure/azure-sdk-for-go/sdk/azcore v1.21.0 h1:fou+2+WFTib47nS+nz/ozhEBnvU96bKHy6LjRsY4E28=
-github.com/Azure/azure-sdk-for-go/sdk/azcore v1.21.0/go.mod h1:t76Ruy8AHvUAC8GfMWJMa0ElSbuIcO03NLpynfbgsPA=
+github.com/Azure/azure-sdk-for-go/sdk/azcore v1.22.0 h1:aokoqcHvaGjiM3VpjKDfMMnF/8epJ+Q1HLJ7CudztqE=
+github.com/Azure/azure-sdk-for-go/sdk/azcore v1.22.0/go.mod h1:/WYEx9pcM9Y+Dd/APJaNlSvVSvzl54rrMdZT5+Oi2LM=
 github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 h1:Hk5QBxZQC1jb2Fwj6mpzme37xbCDdNTxU7O9eb5+LB4=
 github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1/go.mod h1:IYus9qsFobWIc2YVwe/WPjcnyCkPKtnHAqUYeebc8z0=
 github.com/Azure/azure-sdk-for-go/sdk/internal v1.12.0 h1:fhqpLE3UEXi9lPaBRpQ6XuRW0nU7hgg4zlmZZa+a9q4=
@@ -28,42 +28,42 @@ github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ
 github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE=
 github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0=
 github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
-github.com/alecthomas/chroma/v2 v2.23.1 h1:nv2AVZdTyClGbVQkIzlDm/rnhk1E9bU9nXwmZ/Vk/iY=
-github.com/alecthomas/chroma/v2 v2.23.1/go.mod h1:NqVhfBR0lte5Ouh3DcthuUCTUpDC9cxBOfyMbMQPs3o=
+github.com/alecthomas/chroma/v2 v2.27.0 h1:FodwmyOBgJULFYmDqibcp9pvfDLWdtPRh9v/r5BXYZs=
+github.com/alecthomas/chroma/v2 v2.27.0/go.mod h1:NjJ3ciIgrqBNeIkWZ4e46nseoLDslxU1LmfCoL+wcY8=
 github.com/alecthomas/repr v0.5.2 h1:SU73FTI9D1P5UNtvseffFSGmdNci/O6RsqzeXJtP0Qs=
 github.com/alecthomas/repr v0.5.2/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
 github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
 github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
-github.com/aws/aws-sdk-go-v2 v1.41.5 h1:dj5kopbwUsVUVFgO4Fi5BIT3t4WyqIDjGKCangnV/yY=
-github.com/aws/aws-sdk-go-v2 v1.41.5/go.mod h1:mwsPRE8ceUUpiTgF7QmQIJ7lgsKUPQOUl3o72QBrE1o=
-github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.8 h1:eBMB84YGghSocM7PsjmmPffTa+1FBUeNvGvFou6V/4o=
-github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.8/go.mod h1:lyw7GFp3qENLh7kwzf7iMzAxDn+NzjXEAGjKS2UOKqI=
-github.com/aws/aws-sdk-go-v2/config v1.32.14 h1:opVIRo/ZbbI8OIqSOKmpFaY7IwfFUOCCXBsUpJOwDdI=
-github.com/aws/aws-sdk-go-v2/config v1.32.14/go.mod h1:U4/V0uKxh0Tl5sxmCBZ3AecYny4UNlVmObYjKuuaiOo=
-github.com/aws/aws-sdk-go-v2/credentials v1.19.14 h1:n+UcGWAIZHkXzYt87uMFBv/l8THYELoX6gVcUvgl6fI=
-github.com/aws/aws-sdk-go-v2/credentials v1.19.14/go.mod h1:cJKuyWB59Mqi0jM3nFYQRmnHVQIcgoxjEMAbLkpr62w=
-github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21 h1:NUS3K4BTDArQqNu2ih7yeDLaS3bmHD0YndtA6UP884g=
-github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21/go.mod h1:YWNWJQNjKigKY1RHVJCuupeWDrrHjRqHm0N9rdrWzYI=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.21 h1:Rgg6wvjjtX8bNHcvi9OnXWwcE0a2vGpbwmtICOsvcf4=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.21/go.mod h1:A/kJFst/nm//cyqonihbdpQZwiUhhzpqTsdbhDdRF9c=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.21 h1:PEgGVtPoB6NTpPrBgqSE5hE/o47Ij9qk/SEZFbUOe9A=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.21/go.mod h1:p+hz+PRAYlY3zcpJhPwXlLC4C+kqn70WIHwnzAfs6ps=
-github.com/aws/aws-sdk-go-v2/internal/ini v1.8.6 h1:qYQ4pzQ2Oz6WpQ8T3HvGHnZydA72MnLuFK9tJwmrbHw=
-github.com/aws/aws-sdk-go-v2/internal/ini v1.8.6/go.mod h1:O3h0IK87yXci+kg6flUKzJnWeziQUKciKrLjcatSNcY=
-github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7 h1:5EniKhLZe4xzL7a+fU3C2tfUN4nWIqlLesfrjkuPFTY=
-github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7/go.mod h1:x0nZssQ3qZSnIcePWLvcoFisRXJzcTVvYpAAdYX8+GI=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.21 h1:c31//R3xgIJMSC8S6hEVq+38DcvUlgFY0FM6mSI5oto=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.21/go.mod h1:r6+pf23ouCB718FUxaqzZdbpYFyDtehyZcmP5KL9FkA=
-github.com/aws/aws-sdk-go-v2/service/signin v1.0.9 h1:QKZH0S178gCmFEgst8hN0mCX1KxLgHBKKY/CLqwP8lg=
-github.com/aws/aws-sdk-go-v2/service/signin v1.0.9/go.mod h1:7yuQJoT+OoH8aqIxw9vwF+8KpvLZ8AWmvmUWHsGQZvI=
-github.com/aws/aws-sdk-go-v2/service/sso v1.30.15 h1:lFd1+ZSEYJZYvv9d6kXzhkZu07si3f+GQ1AaYwa2LUM=
-github.com/aws/aws-sdk-go-v2/service/sso v1.30.15/go.mod h1:WSvS1NLr7JaPunCXqpJnWk1Bjo7IxzZXrZi1QQCkuqM=
-github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19 h1:dzztQ1YmfPrxdrOiuZRMF6fuOwWlWpD2StNLTceKpys=
-github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19/go.mod h1:YO8TrYtFdl5w/4vmjL8zaBSsiNp3w0L1FfKVKenZT7w=
-github.com/aws/aws-sdk-go-v2/service/sts v1.41.10 h1:p8ogvvLugcR/zLBXTXrTkj0RYBUdErbMnAFFp12Lm/U=
-github.com/aws/aws-sdk-go-v2/service/sts v1.41.10/go.mod h1:60dv0eZJfeVXfbT1tFJinbHrDfSJ2GZl4Q//OSSNAVw=
-github.com/aws/smithy-go v1.24.3 h1:XgOAaUgx+HhVBoP4v8n6HCQoTRDhoMghKqw4LNHsDNg=
-github.com/aws/smithy-go v1.24.3/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc=
+github.com/aws/aws-sdk-go-v2 v1.42.0 h1:XvXMJTkFQtpBKIWZnmr9ZEOc2InWM2yldjXEJ/bymhA=
+github.com/aws/aws-sdk-go-v2 v1.42.0/go.mod h1:27+ACypSLljLAEKsCYOmrjKh83vuTRkuAe9Uv/3A4bg=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.13 h1:p1BBrg/Hhp6uK7zpejeI8QFXHJeC/mynzi04Sl03k9g=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.13/go.mod h1:8cIfkE9MDhkRZGpQ22aV6/lkYeYSozpz16Smrs5x4Ls=
+github.com/aws/aws-sdk-go-v2/config v1.32.25 h1:ACCejvStYoilgwrfegSt5ZntCbPrk52qfwyNcnl3omM=
+github.com/aws/aws-sdk-go-v2/config v1.32.25/go.mod h1:LJyU8sDRbXUxFn8xMJIGP+v9QYYwveNLI8a/giAOiAs=
+github.com/aws/aws-sdk-go-v2/credentials v1.19.24 h1:2hQqYCV9yqyePQ9o6dCrZc/zO8U3TwPr9mIKlZnPu/I=
+github.com/aws/aws-sdk-go-v2/credentials v1.19.24/go.mod h1:IDwpACtwqHLISdzfwUUNq4P9DsB/h5BLg4FwJPNfqFY=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.29 h1:r6qZHbT+wxgWO/e9vYNUEtg7lv5+UN3pRqKhLXvnArg=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.29/go.mod h1:QRnaRcTVGKPGRy8w78HMQtKUGRYcnMZAANATkeVA6Mo=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.29 h1:f3vKqSo13fhTYb+JEcXwXefZQE26I1FB5eTSniU67ko=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.29/go.mod h1:MzoLFUArKGpGD+ukmPiTPG1X5x4o6M2kq4v2dr1FiEc=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.29 h1:RdwIf/CuUsvJX3RgJagbOyotl/cxoLY4xviKuE7p2GY=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.29/go.mod h1:71wt8W2EgswdZy9Mf9KNnzxZ3TiZlv4caKghPktDOkA=
+github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.30 h1:VTGy885W5DKBxWRUJbym9hytNaYzsyaPkCHGRRMAOhU=
+github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.30/go.mod h1:AS0HycUvJRFvTt613AYDOgO2jzw+00cVSMny8XB3yMY=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.12 h1:ZD2+BSw9vFsNlKYIasSNt3uDbjqqXIBcM13UJv/Lx2k=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.12/go.mod h1:Ms4zlcVBbXbiP7EVLhl+lgjvA/a7YphqQ3Ih3174EmI=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.29 h1:DRebniUGZ2MqiiIVmQJ04vIXr918hubdHMnarSLEWyU=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.29/go.mod h1:LfRkPCD8YHDM2E5eTkos2UpwYeZnBcVarTa8L59bJHA=
+github.com/aws/aws-sdk-go-v2/service/signin v1.2.0 h1:3nXpRcFwRCW8n7HgO2QGy0Dc20eQNfBuUemGQhpF8m8=
+github.com/aws/aws-sdk-go-v2/service/signin v1.2.0/go.mod h1:LxYujSTLPRlp2vTtcUO/+1ilrew8ytt6SvQyOgejzFQ=
+github.com/aws/aws-sdk-go-v2/service/sso v1.31.3 h1:ey1XLTYXb9PcLt4535632o5kCGXNXEhNb620Dqwuylo=
+github.com/aws/aws-sdk-go-v2/service/sso v1.31.3/go.mod h1:Lk7PlmoTYryQmyBG0EXqj5BcUbj3whXdU2s3yGI3EAc=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.36.6 h1:yLr03zQE/5Eu5l3QU0Si+xMbLMbSDF2YXsigqXngs6g=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.36.6/go.mod h1:Q5N6icH+KJZDLh+ESNwzdv6cZ6vLFF/egy3IOxWhmz4=
+github.com/aws/aws-sdk-go-v2/service/sts v1.43.3 h1:VrIhKRCSK1umelSgB9RghvA9RTUYeQffyAS5ApXehNI=
+github.com/aws/aws-sdk-go-v2/service/sts v1.43.3/go.mod h1:r8wkDOuLaaMFqFiYAb8dGY2A3gJCOujMc6CFOVC4Zhc=
+github.com/aws/smithy-go v1.27.2 h1:y9NPmSE6am6LjEFPfqHqG/jJk7AauQvhCJONKh7kpzk=
+github.com/aws/smithy-go v1.27.2/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc=
 github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
 github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
 github.com/aymanbagabas/go-udiff v0.4.1 h1:OEIrQ8maEeDBXQDoGCbbTTXYJMYRCRO1fnodZ12Gv5o=
@@ -84,26 +84,28 @@ github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834 h1:ZR7e0r
 github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834/go.mod h1:aKC/t2arECF6rNOnaKaVU6y4t4ZeHQzqfxedE/VkVhA=
 github.com/charmbracelet/log v1.0.0 h1:HVVVMmfOorfj3BA9i8X8UL69Hoz9lI0PYwXfJvOdRc4=
 github.com/charmbracelet/log v1.0.0/go.mod h1:uYgY3SmLpwJWxmlrPwXvzVYujxis1vAKRV/0VQB7yWA=
-github.com/charmbracelet/openai-go v0.0.0-20260319145158-d0740cc34266 h1:BW/sZtyd1JyYy0h5adMm3tzpNyL857LWjuTRET6OhpY=
-github.com/charmbracelet/openai-go v0.0.0-20260319145158-d0740cc34266/go.mod h1:1DahUaExbUZx/jD+FNT2PKP4L9rLE5+ZBRuI8mZjd/E=
-github.com/charmbracelet/ultraviolet v0.0.0-20260330092749-0f94982c930b h1:ASDO9RT6SNKTQN87jO2bRfxHFJq8cgeYdFzivY2gCeM=
-github.com/charmbracelet/ultraviolet v0.0.0-20260330092749-0f94982c930b/go.mod h1:Vo8TffMf0q7Uho/n8e6XpBZvOWtd3g39yX+9P5rRutA=
-github.com/charmbracelet/x/ansi v0.11.6 h1:GhV21SiDz/45W9AnV2R61xZMRri5NlLnl6CVF7ihZW8=
-github.com/charmbracelet/x/ansi v0.11.6/go.mod h1:2JNYLgQUsyqaiLovhU2Rv/pb8r6ydXKS3NIttu3VGZQ=
+github.com/charmbracelet/openai-go v0.0.0-20260617131321-5e4b9c18c4be h1:pg+OWlIkk9HOe/8P5J95aKe2wGDzFUiiyFOUpwR30B4=
+github.com/charmbracelet/openai-go v0.0.0-20260617131321-5e4b9c18c4be/go.mod h1:1DahUaExbUZx/jD+FNT2PKP4L9rLE5+ZBRuI8mZjd/E=
+github.com/charmbracelet/ultraviolet v0.0.0-20260615092913-2399af76d5b1 h1:4+r3uOJ69ueRBt4okgEfWZeXs3BD36HcDBmOIAUlETk=
+github.com/charmbracelet/ultraviolet v0.0.0-20260615092913-2399af76d5b1/go.mod h1:f/jRa757WUmaOZrbPspXymbg/GnbF+rwe4OLsG7aXYo=
+github.com/charmbracelet/x/ansi v0.11.7 h1:kzv1kJvjg2S3r9KHo8hDdHFQLEqn4RBCb39dAYC84jI=
+github.com/charmbracelet/x/ansi v0.11.7/go.mod h1:9qGpnAVYz+8ACONkZBUWPtL7lulP9No6p1epAihUZwQ=
 github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI=
 github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q=
 github.com/charmbracelet/x/conpty v0.1.1 h1:s1bUxjoi7EpqiXysVtC+a8RrvPPNcNvAjfi4jxsAuEs=
 github.com/charmbracelet/x/conpty v0.1.1/go.mod h1:OmtR77VODEFbiTzGE9G1XiRJAga6011PIm4u5fTNZpk=
+github.com/charmbracelet/x/editor v0.2.0 h1:7XLUKtaRaB8jN7bWU2p2UChiySyaAuIfYiIRg8gGWwk=
+github.com/charmbracelet/x/editor v0.2.0/go.mod h1:p3oQ28TSL3YPd+GKJ1fHWcp+7bVGpedHpXmo0D6t1dY=
 github.com/charmbracelet/x/errors v0.0.0-20240508181413-e8d8b6e2de86 h1:JSt3B+U9iqk37QUU2Rvb6DSBYRLtWqFqfxf8l5hOZUA=
 github.com/charmbracelet/x/errors v0.0.0-20240508181413-e8d8b6e2de86/go.mod h1:2P0UgXMEa6TsToMSuFqKFQR+fZTO9CNGUNokkPatT/0=
-github.com/charmbracelet/x/exp/charmtone v0.0.0-20260330094520-2dce04b6f8a4 h1:pIj18ZCZO4WOVj7jwjLoUb1lC7rS/I8oC3fZWXugNaY=
-github.com/charmbracelet/x/exp/charmtone v0.0.0-20260330094520-2dce04b6f8a4/go.mod h1:nsExn0DGyX0lh9LwLHTn2Gg+hafdzfSXnC+QmEJTZFY=
+github.com/charmbracelet/x/exp/charmtone v0.0.0-20260615092313-b57e5e6d29bb h1:hoqNT54vrpXamSaQe5GxupakGgvvqFmVgmLJjotpHco=
+github.com/charmbracelet/x/exp/charmtone v0.0.0-20260615092313-b57e5e6d29bb/go.mod h1:nsExn0DGyX0lh9LwLHTn2Gg+hafdzfSXnC+QmEJTZFY=
 github.com/charmbracelet/x/exp/golden v0.0.0-20250806222409-83e3a29d542f h1:pk6gmGpCE7F3FcjaOEKYriCvpmIN4+6OS/RD0vm4uIA=
 github.com/charmbracelet/x/exp/golden v0.0.0-20250806222409-83e3a29d542f/go.mod h1:IfZAMTHB6XkZSeXUqriemErjAWCCzT0LwjKFYCZyw0I=
 github.com/charmbracelet/x/exp/ordered v0.1.0 h1:55/qLwjIh0gL0Vni+QAWk7T/qRVP6sBf+2agPBgnOFE=
 github.com/charmbracelet/x/exp/ordered v0.1.0/go.mod h1:5UHwmG+is5THxMyCJHNPCn2/ecI07aKNrW+LcResjJ8=
-github.com/charmbracelet/x/exp/slice v0.0.0-20260330094520-2dce04b6f8a4 h1:VSd4zShIAf/4FgEDFJpapEcAPrc7h3dyyN7V9JlJpQw=
-github.com/charmbracelet/x/exp/slice v0.0.0-20260330094520-2dce04b6f8a4/go.mod h1:vqEfX6xzqW1pKKZUUiFOKg0OQ7bCh54Q2vR/tserrRA=
+github.com/charmbracelet/x/exp/slice v0.0.0-20260615092313-b57e5e6d29bb h1:fr6DwrfJB2XQ3zM2fCwumXPE5G+hegnkEpl1KUuPsQI=
+github.com/charmbracelet/x/exp/slice v0.0.0-20260615092313-b57e5e6d29bb/go.mod h1:vqEfX6xzqW1pKKZUUiFOKg0OQ7bCh54Q2vR/tserrRA=
 github.com/charmbracelet/x/exp/strings v0.1.0 h1:i69S2XI7uG1u4NLGeJPSYU++Nmjvpo9nwd6aoEm7gkA=
 github.com/charmbracelet/x/exp/strings v0.1.0/go.mod h1:/ehtMPNh9K4odGFkqYJKpIYyePhdp1hLBRvyY4bWkH8=
 github.com/charmbracelet/x/json v0.2.0 h1:DqB+ZGx2h+Z+1s98HOuOyli+i97wsFQIxP2ZQANTPrQ=
@@ -122,15 +124,17 @@ github.com/clipperhouse/uax29/v2 v2.7.0 h1:+gs4oBZ2gPfVrKPthwbMzWZDaAFPGYK72F0NJ
 github.com/clipperhouse/uax29/v2 v2.7.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM=
 github.com/cncf/xds/go v0.0.0-20260202195803-dba9d589def2 h1:aBangftG7EVZoUb69Os8IaYg++6uMOdKK83QtkkvJik=
 github.com/cncf/xds/go v0.0.0-20260202195803-dba9d589def2/go.mod h1:qwXFYgsP6T7XnJtbKlf1HP8AjxZZyzxMmc+Lq5GjlU4=
-github.com/coder/acp-go-sdk v0.6.3 h1:LsXQytehdjKIYJnoVWON/nf7mqbiarnyuyE3rrjBsXQ=
-github.com/coder/acp-go-sdk v0.6.3/go.mod h1:yKzM/3R9uELp4+nBAwwtkS0aN1FOFjo11CNPy37yFko=
+github.com/coder/acp-go-sdk v0.13.5 h1:LI9jq5xon7xslaYlnoktvTVyDlE37yIk2daT7N9ASYk=
+github.com/coder/acp-go-sdk v0.13.5/go.mod h1:yKzM/3R9uELp4+nBAwwtkS0aN1FOFjo11CNPy37yFko=
 github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
 github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s=
 github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ=
-github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/dlclark/regexp2 v1.12.0 h1:0j4c5qQmnC6XOWNjP3PIXURXN2gWx76rd3KvgdPkCz8=
+github.com/dlclark/regexp2 v1.12.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/dlclark/regexp2/v2 v2.2.2 h1:MYWvNYw8okuqNhwTYO587EZMiDruVa2vhV6fsGpfya0=
+github.com/dlclark/regexp2/v2 v2.2.2/go.mod h1:avUrQvPaLz2DrFNHJF0taWAFFX2C1GMSSoeiqFjcBmU=
 github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI=
 github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ=
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
@@ -140,14 +144,14 @@ github.com/envoyproxy/go-control-plane/envoy v1.37.0 h1:u3riX6BoYRfF4Dr7dwSOroNf
 github.com/envoyproxy/go-control-plane/envoy v1.37.0/go.mod h1:DReE9MMrmecPy+YvQOAOHNYMALuowAnbjjEMkkWOi6A=
 github.com/envoyproxy/protoc-gen-validate v1.3.3 h1:MVQghNeW+LZcmXe7SY1V36Z+WFMDjpqGAGacLe2T0ds=
 github.com/envoyproxy/protoc-gen-validate v1.3.3/go.mod h1:TsndJ/ngyIdQRhMcVVGDDHINPLWB7C82oDArY51KfB0=
-github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
-github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
+github.com/felixge/httpsnoop v1.1.0 h1:3YtUj32ZZkqZtt3sZZsClsymw/QDuVfpNhoA31zeORc=
+github.com/felixge/httpsnoop v1.1.0/go.mod h1:Zqxgdd+1Rkcz8euOqdr7lqgCRJztwr5hp9vDSi5UZCE=
 github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
 github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
-github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
-github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
-github.com/go-json-experiment/json v0.0.0-20260214004413-d219187c3433 h1:vymEbVwYFP/L05h5TKQxvkXoKxNvTpjxYKdF1Nlwuao=
-github.com/go-json-experiment/json v0.0.0-20260214004413-d219187c3433/go.mod h1:tphK2c80bpPhMOI4v6bIc2xWywPfbqi1Z06+RcrMkDg=
+github.com/fsnotify/fsnotify v1.10.1 h1:b0/UzAf9yR5rhf3RPm9gf3ehBPpf0oZKIjtpKrx59Ho=
+github.com/fsnotify/fsnotify v1.10.1/go.mod h1:TLheqan6HD6GBK6PrDWyDPBaEV8LspOxvPSjC+bVfgo=
+github.com/go-json-experiment/json v0.0.0-20260601182631-00ed12fed2a6 h1:nxP4pPoyqOAgX8lYDFCfl3DyKeXErCvSvhcyzwGV9CE=
+github.com/go-json-experiment/json v0.0.0-20260601182631-00ed12fed2a6/go.mod h1:tphK2c80bpPhMOI4v6bIc2xWywPfbqi1Z06+RcrMkDg=
 github.com/go-logfmt/logfmt v0.6.1 h1:4hvbpePJKnIzH1B+8OR/JPbTx37NktoI9LE2QZBBkvE=
 github.com/go-logfmt/logfmt v0.6.1/go.mod h1:EV2pOAQoZaT1ZXZbqDl5hrymndi4SY9ED9/z6CO0XAk=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
@@ -165,16 +169,16 @@ github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek
 github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
-github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbcqoRA8=
-github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE=
+github.com/google/jsonschema-go v0.4.3 h1:/DBOLZTfDow7pe2GmaJNhltueGTtDKICi8V8p+DQPd0=
+github.com/google/jsonschema-go v0.4.3/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE=
 github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
 github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/googleapis/enterprise-certificate-proxy v0.3.14 h1:yh8ncqsbUY4shRD5dA6RlzjJaT4hi3kII+zYw8wmLb8=
-github.com/googleapis/enterprise-certificate-proxy v0.3.14/go.mod h1:vqVt9yG9480NtzREnTlmGSBmFrA+bzb0yl0TxoBQXOg=
-github.com/googleapis/gax-go/v2 v2.21.0 h1:h45NjjzEO3faG9Lg/cFrBh2PgegVVgzqKzuZl/wMbiI=
-github.com/googleapis/gax-go/v2 v2.21.0/go.mod h1:But/NJU6TnZsrLai/xBAQLLz+Hc7fHZJt/hsCz3Fih4=
+github.com/googleapis/enterprise-certificate-proxy v0.3.16 h1:F/VPrx0YPBdksZJQdCAp0WUsqnNmZpUZszzfYt0M5Dw=
+github.com/googleapis/enterprise-certificate-proxy v0.3.16/go.mod h1:9Yb0eAkH/Xqhvv3zbeKf/+wMJqCeocWc6KIhDvEAuYE=
+github.com/googleapis/gax-go/v2 v2.22.0 h1:PjIWBpgGIVKGoCXuiCoP64altEJCj3/Ei+kSU5vlZD4=
+github.com/googleapis/gax-go/v2 v2.22.0/go.mod h1:irWBbALSr0Sk3qlqb9SyJ1h68WjgeFuiOzI4Rqw5+aY=
 github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
 github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
@@ -185,14 +189,10 @@ github.com/indaco/herald v0.13.0 h1:+xVG9Fx5NpuWhwku/9IlRL6I009NnX4VUGKvlZHTRxU=
 github.com/indaco/herald v0.13.0/go.mod h1:T5g1+XLYvpjouhzAGHnAHDCKizhESkoV6+QPZ3DhgWA=
 github.com/indaco/herald-md v0.3.0 h1:hN1cKyrexPPM9PeHBsKuaWvIizSi/iYvM9yzRgtdb8M=
 github.com/indaco/herald-md v0.3.0/go.mod h1:RUHVaDSG45ymJjKyxpDwBocLXrZo93FB4OeYMsw9B9s=
-github.com/kaptinlin/go-i18n v0.3.0 h1:wP76dvYg04bvwTb+8NB+CmdZ2kL7lSSCQ9B/kFv7QHo=
-github.com/kaptinlin/go-i18n v0.3.0/go.mod h1:pVcu9qsW5pOIOoZFJXesRYmLos1vMQrby70JPAoWmJU=
-github.com/kaptinlin/jsonpointer v0.4.17 h1:mY9k8ciWncxbsECyaxKnR0MdmxamNdp2tLQkAKVrtSk=
-github.com/kaptinlin/jsonpointer v0.4.17/go.mod h1:SsfsjqnHG5zuKo1DTBzk1VknaHlL4osHw+X9kZKukpU=
-github.com/kaptinlin/jsonschema v0.7.7 h1:41BlQJ9dskH0oE5DSzBUrl/w4JQYIr6N6L0B5GNyDoM=
-github.com/kaptinlin/jsonschema v0.7.7/go.mod h1:rKjWfyySHSxAD7Li2ctYkPlOu960igoKBvZ2ADRtd5Q=
-github.com/kaptinlin/messageformat-go v0.4.19 h1:A5kuuZ1ybXDQ7kD1aoEWGAOemX7hLsMY0yolgSbgpRI=
-github.com/kaptinlin/messageformat-go v0.4.19/go.mod h1:utSDTfiXTxl66OC5RIEuObLH7Ue3YjbA2X86SYMBYWg=
+github.com/kaptinlin/jsonpointer v0.4.26 h1:tw616yszHek+B3/GtDSia+uzBa3sLXGpmo4tYeMhBZw=
+github.com/kaptinlin/jsonpointer v0.4.26/go.mod h1:wVOBaXGGnP42YsMb6zev/3W5POTvspdNfh8DXzf8XS8=
+github.com/kaptinlin/jsonschema v0.8.1 h1:Krhuq1HpE+olHoPfcxkohqKKCnXfixUPv+aUYRegBBQ=
+github.com/kaptinlin/jsonschema v0.8.1/go.mod h1:mCH2W5lXd29tdDjvoFfY32nedPORnlk7pCVrrcs/NkQ=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
@@ -201,12 +201,12 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0
 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
 github.com/lucasb-eyer/go-colorful v1.4.0 h1:UtrWVfLdarDgc44HcS7pYloGHJUjHV/4FwW4TvVgFr4=
 github.com/lucasb-eyer/go-colorful v1.4.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
-github.com/mark3labs/mcp-go v0.47.0 h1:h44yeM3DduDyQgzImYWu4pt6VRkqP/0p/95AGhWngnA=
-github.com/mark3labs/mcp-go v0.47.0/go.mod h1:JKTC7R2LLVagkEWK7Kwu7DbmA6iIvnNAod6yrHiQMag=
-github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
-github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
-github.com/mattn/go-runewidth v0.0.22 h1:76lXsPn6FyHtTY+jt2fTTvsMUCZq1k0qwRsAMuxzKAk=
-github.com/mattn/go-runewidth v0.0.22/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
+github.com/mark3labs/mcp-go v0.55.0 h1:lJfz2aoctiwK+sI991+uIYwmKNIBciI+O7zsyDsa4U8=
+github.com/mark3labs/mcp-go v0.55.0/go.mod h1:+8WclSK1ZUweCP3hvktSji8n8ABG/95QaEkeVE/Uwas=
+github.com/mattn/go-isatty v0.0.22 h1:j8l17JJ9i6VGPUFUYoTUKPSgKe/83EYU2zBC7YNKMw4=
+github.com/mattn/go-isatty v0.0.22/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4=
+github.com/mattn/go-runewidth v0.0.24 h1:cpokDiIn0MGnhdHwuWnJBITySJ20QyNGnY2kR/ay2DU=
+github.com/mattn/go-runewidth v0.0.24/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
 github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4=
 github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE=
 github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
@@ -221,8 +221,8 @@ github.com/muesli/roff v0.1.0 h1:YD0lalCotmYuF5HhZliKWlIx7IEhiXeSfq7hNjFqGF8=
 github.com/muesli/roff v0.1.0/go.mod h1:pjAHQM9hdUUwm/krAfrLGgJkXJ+YuhtsfZ42kieB2Ig=
 github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
 github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
-github.com/pelletier/go-toml/v2 v2.3.0 h1:k59bC/lIZREW0/iVaQR8nDHxVq8OVlIzYCOJf421CaM=
-github.com/pelletier/go-toml/v2 v2.3.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
+github.com/pelletier/go-toml/v2 v2.4.0 h1:Mwu0mAkUKbittDs3/ADDWXqMmq3EOK2VHiuCkV00Row=
+github.com/pelletier/go-toml/v2 v2.4.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
 github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
 github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
 github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
@@ -236,6 +236,8 @@ github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/sagikazarmark/locafero v0.12.0 h1:/NQhBAkUb4+fH1jivKHWusDYFjMOOKU88eegjfxfHb4=
 github.com/sagikazarmark/locafero v0.12.0/go.mod h1:sZh36u/YSZ918v0Io+U9ogLYQJ9tLLBmM4eneO6WwsI=
+github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 h1:KRzFb2m7YtdldCEkzs6KqmJw4nqEVZGK7IN2kJkjTuQ=
+github.com/santhosh-tekuri/jsonschema/v6 v6.0.2/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU=
 github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I=
 github.com/spf13/afero v1.15.0/go.mod h1:NC2ByUVxtQs4b3sIUphxK0NioZnmxgyCrfzeuq8lxMg=
 github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY=
@@ -252,8 +254,8 @@ github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD
 github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
 github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
 github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
-github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
-github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/gjson v1.19.0 h1:xwxm7n691Uf3u5OFjzngavjGTh55KX5q/9w9xHW88JU=
+github.com/tidwall/gjson v1.19.0/go.mod h1:V37/opeE/JbLUOfH0QTXiNez2l0RUjYUhpT4szFQAfc=
 github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
 github.com/tidwall/match v1.2.0 h1:0pt8FlkOwjN2fPt4bIl4BoNxb98gGHN2ObFEDkrfZnM=
 github.com/tidwall/match v1.2.0/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
@@ -272,55 +274,56 @@ github.com/yuin/goldmark v1.8.2 h1:kEGpgqJXdgbkhcOgBxkC0X0PmoPG1ZyoZ117rDVp4zE=
 github.com/yuin/goldmark v1.8.2/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg=
 go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
 go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
-go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.67.0 h1:yI1/OhfEPy7J9eoa6Sj051C7n5dvpj0QX8g4sRchg04=
-go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.67.0/go.mod h1:NoUCKYWK+3ecatC4HjkRktREheMeEtrXoQxrqYFeHSc=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 h1:OyrsyzuttWTSur2qN/Lm0m2a8yqyIjUVBZcxFPuXq2o=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0/go.mod h1:C2NGBr+kAB4bk3xtMXfZ94gqFDtg/GkI7e9zqGh5Beg=
-go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I=
-go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0=
-go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM=
-go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY=
-go.opentelemetry.io/otel/sdk v1.42.0 h1:LyC8+jqk6UJwdrI/8VydAq/hvkFKNHZVIWuslJXYsDo=
-go.opentelemetry.io/otel/sdk v1.42.0/go.mod h1:rGHCAxd9DAph0joO4W6OPwxjNTYWghRWmkHuGbayMts=
-go.opentelemetry.io/otel/sdk/metric v1.42.0 h1:D/1QR46Clz6ajyZ3G8SgNlTJKBdGp84q9RKCAZ3YGuA=
-go.opentelemetry.io/otel/sdk/metric v1.42.0/go.mod h1:Ua6AAlDKdZ7tdvaQKfSmnFTdHx37+J4ba8MwVCYM5hc=
-go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A=
-go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0=
+go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.69.0 h1:2yEATaop1/a1I4psnSLgWVPLWwCzkqWakgJy7xTDVy0=
+go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.69.0/go.mod h1:D7J12YRapIekYyPWgGPlA/23pRmpSEZC5xJC/TTLI9U=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.69.0 h1:8tvICD4vSTOOsNrsI4Ljf6C+6UKvpTEH5XY3JMoyPoo=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.69.0/go.mod h1:z9+yiacE0IHRqM4qFfkbt/JYlmYXgss8GY/jXoNuPJI=
+go.opentelemetry.io/otel v1.44.0 h1:JjwHmHpA4iZ3wBxluu2fbbE7j4kqlE8jXyAyPXH7HqU=
+go.opentelemetry.io/otel v1.44.0/go.mod h1:BMgjTHL9WPRlRjL2oZCBTL4whCGtXch2H4BhOPIAyYc=
+go.opentelemetry.io/otel/metric v1.44.0 h1:1w0gILTcHdr3YI+ixLyjemwrVnsMURbTZFrSYCdDdmc=
+go.opentelemetry.io/otel/metric v1.44.0/go.mod h1:8O7hanEPBNgEMmybD3s2VBKcgWOCsA6tzHBPODAiquo=
+go.opentelemetry.io/otel/sdk v1.44.0 h1:nHYwb9lK+fJPU/dnT6s7W7Z8itMWyqrnVfbheVYrZ58=
+go.opentelemetry.io/otel/sdk v1.44.0/go.mod h1:Osuydd3Se74nqjAKxid74N5eC+jfEqfTegHRnq58oK0=
+go.opentelemetry.io/otel/sdk/metric v1.44.0 h1:3LlKgI+VjbVsjNRFZJZAJ30WjXC5VkNRks6si09iEfI=
+go.opentelemetry.io/otel/sdk/metric v1.44.0/go.mod h1:5B5pMARnXxKhltooO4xUuCBorl65a4EpnTalObqOigA=
+go.opentelemetry.io/otel/trace v1.44.0 h1:jxF5CsGYCe74MCRx2X4g7WsY/VBKRqqpNvXlX/6gtIk=
+go.opentelemetry.io/otel/trace v1.44.0/go.mod h1:oLl1jrMQAVo6v3GAggN+1VH9VIz9iUSvW53sW1Q8PIE=
 go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
 go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
-golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
-golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
-golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90 h1:jiDhWWeC7jfWqR9c/uplMOqJ0sbNlNWv0UkzE0vX1MA=
-golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90/go.mod h1:xE1HEv6b+1SCZ5/uscMRjUBKtIxworgEcEi+/n9NQDQ=
-golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
-golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
+golang.org/x/crypto v0.53.0 h1:QZ4Muo8THX6CizN2vPPd5fBGHyogrdK9fG4wLPFUsto=
+golang.org/x/crypto v0.53.0/go.mod h1:DNLU434OwVakk9PzuwV8w62mAJpRJL3vsgcfp4Qnsio=
+golang.org/x/exp v0.0.0-20260611194520-c48552f49976 h1:X8Hz2ImujgbmetVuW+w2YkyZChE3cBpZi2P158rTG9M=
+golang.org/x/exp v0.0.0-20260611194520-c48552f49976/go.mod h1:vnf4pv9iKZXY58sQE1L86zmNWJ4159e1RkcWiLCkeEY=
+golang.org/x/image v0.42.0 h1:1gSs6ehNWXLbkHBIPcWztk3D/6aIA/8hauiAYtlodVY=
+golang.org/x/image v0.42.0/go.mod h1:rrpelvGFt+kLPAjPM4HeWPgrl0FtafueU//e5N0qk/Q=
+golang.org/x/net v0.56.0 h1:Rw8j/hFzGvJUZwNBXnAtf5sVDVt+65SK2C7IxCxZt5o=
+golang.org/x/net v0.56.0/go.mod h1:D3Ku6r+V6JROoZK144D2XfMHFcMq/0zSfLelVTCFKec=
 golang.org/x/oauth2 v0.36.0 h1:peZ/1z27fi9hUOFCAZaHyrpWG5lwe0RJEEEeH0ThlIs=
 golang.org/x/oauth2 v0.36.0/go.mod h1:YDBUJMTkDnJS+A4BP4eZBjCqtokkg1hODuPjwiGPO7Q=
-golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
-golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
-golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
-golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
-golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU=
-golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A=
-golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
-golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
+golang.org/x/sync v0.21.0 h1:HLII4xRRTtCRkxYp4HNFF0Js/Og6q2i++KXbg0gHCwM=
+golang.org/x/sync v0.21.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
+golang.org/x/sys v0.46.0 h1:noSf2Fq6F8DBgS+LysIkx7rIExoNHJsxOAtPp4rthXw=
+golang.org/x/sys v0.46.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
+golang.org/x/term v0.44.0 h1:0rLvDRCtNj0gZkyIXhCyOb2OAzEhLVqc4B+hrsBhrmc=
+golang.org/x/term v0.44.0/go.mod h1:7ze4MdzUzLXpSAoFP1H0bOI9aXDqveSvatT5vKcFh2Y=
+golang.org/x/text v0.38.0 h1:sXmwo9DwP3OK9EZ7PqAdaooSGozfl/3a6/xJcbzPRhE=
+golang.org/x/text v0.38.0/go.mod h1:YXZt3QhHUKYT53r2lLKFIVi6Ao1jdzrTR/KQ09qyxF4=
 golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
 golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
 gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4=
 gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E=
-google.golang.org/api v0.274.0 h1:aYhycS5QQCwxHLwfEHRRLf9yNsfvp1JadKKWBE54RFA=
-google.golang.org/api v0.274.0/go.mod h1:JbAt7mF+XVmWu6xNP8/+CTiGH30ofmCmk9nM8d8fHew=
-google.golang.org/genai v1.52.1 h1:dYoljKtLDXMiBdVaClSJ/ZPwZ7j1N0lGjMhwOKOQUlk=
-google.golang.org/genai v1.52.1/go.mod h1:A3kkl0nyBjyFlNjgxIwKq70julKbIxpSxqKO5gw/gmk=
-google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7 h1:XzmzkmB14QhVhgnawEVsOn6OFsnpyxNPRY9QV01dNB0=
-google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7/go.mod h1:L43LFes82YgSonw6iTXTxXUX1OlULt4AQtkik4ULL/I=
-google.golang.org/genproto/googleapis/api v0.0.0-20260319201613-d00831a3d3e7 h1:41r6JMbpzBMen0R/4TZeeAmGXSJC7DftGINUodzTkPI=
-google.golang.org/genproto/googleapis/api v0.0.0-20260319201613-d00831a3d3e7/go.mod h1:EIQZ5bFCfRQDV4MhRle7+OgjNtZ6P1PiZBgAKuxXu/Y=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8=
-google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM=
-google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4=
+google.golang.org/api v0.285.0 h1:B7eHHoKGAX/LrPkQvhQqnGwjgWxofbdGwCTQvpm8FkM=
+google.golang.org/api v0.285.0/go.mod h1:NlOlUIr8MPoIhT9Bb/oUnRuHbJOLwxb6JSYJM8Yz+jQ=
+google.golang.org/genai v1.61.0 h1:wCyNGiaC9q5A59B80zuEtNBhq3ypEvICFkZYOfK7IO0=
+google.golang.org/genai v1.61.0/go.mod h1:mDdPDFXo1Ats7f1WXVyZgWb/CkMzFWTWJruIMy7hGIU=
+google.golang.org/genproto v0.0.0-20260610212136-7ab31c22f7ad h1:cYL1DPJAQr4JMvhfGao0PDXoaf03ifMljAuDyrbMBd0=
+google.golang.org/genproto v0.0.0-20260610212136-7ab31c22f7ad/go.mod h1:cVHIikDNAdx8ISZeW+2rYkEMf3xn0GSaBYmVnWXQBUo=
+google.golang.org/genproto/googleapis/api v0.0.0-20260610212136-7ab31c22f7ad h1:3iLyITS/sySRwbUKoC7ogfj2Yr1Cjs0pfaRKj5U5HEw=
+google.golang.org/genproto/googleapis/api v0.0.0-20260610212136-7ab31c22f7ad/go.mod h1:KdNqO+rCIWgFumrNBSEDlDNrkrQnpkax7Tv1WxNY8V4=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20260615183401-62b3387ff324 h1:9HZDLIdYBJXAnaFOr9WHrKVycfpY+75s9HGadC0305A=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20260615183401-62b3387ff324/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8=
+google.golang.org/grpc v1.81.1 h1:VnnIIZ88UzOOKLukQi+ImGz8O1Wdp8nAGGnvOfEIWQQ=
+google.golang.org/grpc v1.81.1/go.mod h1:xGH9GfzOyMTGIOXBJmXt+BX/V0kcdQbdcuwQ/zNw42I=
 google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
 google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
@@ -23,18 +23,6 @@ import (
 // Version is injected at build time; fallback to "dev".
 var Version = "dev"

-// thinkingTagOpen and thinkingTagClose are the XML-style tags that some models
-// (Qwen, DeepSeek) wrap reasoning content in. We parse these to extract
-// reasoning/thinking content and send it as ACP thought updates.
-// Also support <think> format used by some models.
-const (
-	thinkingTagOpen    = "<thinking>"
-	thinkingTagClose   = "</thinking>"
-	shortThinkTagOpen  = "<think>"
-	shortThinkTagClose = "</think>"
-)
-
-// Agent implements the acp.Agent interface, delegating to Kit for LLM
 // execution, tool calls, and session management.
 type Agent struct {
 	conn     *acp.AgentSideConnection
@@ -42,10 +30,6 @@ type Agent struct {

 	// toolCallCounter provides unique IDs for tool calls within a turn.
 	toolCallCounter atomic.Int64
-
-	// inThinkingTag tracks whether we're currently inside a <thinking> tag
-	// when parsing streaming content from models that wrap reasoning in XML tags.
-	inThinkingTag bool
 }

 // NewAgent creates a new ACP agent backed by Kit.
@@ -77,6 +61,12 @@ func (a *Agent) Authenticate(_ context.Context, _ acp.AuthenticateRequest) (acp.
 	return acp.AuthenticateResponse{}, nil
 }

+// Logout handles logout requests. Kit doesn't require auth for local stdio
+// usage, so this is a no-op.
+func (a *Agent) Logout(_ context.Context, _ acp.LogoutRequest) (acp.LogoutResponse, error) {
+	return acp.LogoutResponse{}, nil
+}
+
 // Initialize negotiates capabilities with the ACP client.
 func (a *Agent) Initialize(_ context.Context, params acp.InitializeRequest) (acp.InitializeResponse, error) {
 	log.Debug("acp: initialize", "protocol_version", params.ProtocolVersion)
@@ -144,9 +134,6 @@ func (a *Agent) Prompt(ctx context.Context, params acp.PromptRequest) (acp.Promp

 	log.Debug("acp: prompt", "session", sessionID, "prompt_len", len(promptText), "files", len(files))

-	// Reset thinking tag state for this new prompt turn
-	a.inThinkingTag = false
-
 	// Create a cancellable context for this prompt turn.
 	promptCtx, cancel := context.WithCancel(ctx)
 	sess.setCancel(cancel)
@@ -196,22 +183,75 @@ func (a *Agent) SetSessionMode(_ context.Context, _ acp.SetSessionModeRequest) (
 	return acp.SetSessionModeResponse{}, nil
 }

-// SetSessionModel changes the active model for a session.
-func (a *Agent) SetSessionModel(ctx context.Context, params acp.SetSessionModelRequest) (acp.SetSessionModelResponse, error) {
+// ListSessions returns an empty session list. Kit doesn't persist sessions
+// across restarts in ACP mode, so this is effectively a no-op.
+func (a *Agent) ListSessions(_ context.Context, _ acp.ListSessionsRequest) (acp.ListSessionsResponse, error) {
+	return acp.ListSessionsResponse{
+		Sessions: []acp.SessionInfo{},
+	}, nil
+}
+
+// CloseSession cancels any ongoing work for the session and frees its resources.
+func (a *Agent) CloseSession(_ context.Context, params acp.CloseSessionRequest) (acp.CloseSessionResponse, error) {
 	sessionID := string(params.SessionId)
 	sess, ok := a.registry.get(sessionID)
 	if !ok {
-		return acp.SetSessionModelResponse{}, acp.NewInvalidParams(fmt.Sprintf("session not found: %s", sessionID))
+		return acp.CloseSessionResponse{}, nil
 	}

-	modelID := string(params.ModelId)
-	log.Debug("acp: set_session_model", "session", sessionID, "model", modelID)
+	log.Debug("acp: close session", "session", sessionID)
+	sess.cancelPrompt()
+	a.registry.remove(sessionID)
+	return acp.CloseSessionResponse{}, nil
+}

-	if err := sess.kit.SetModel(ctx, modelID); err != nil {
-		return acp.SetSessionModelResponse{}, fmt.Errorf("set model: %w", err)
+// ResumeSession is not supported — Kit doesn't persist sessions across
+// restarts in ACP mode. Clients should use NewSession instead.
+func (a *Agent) ResumeSession(_ context.Context, _ acp.ResumeSessionRequest) (acp.ResumeSessionResponse, error) {
+	return acp.ResumeSessionResponse{}, fmt.Errorf("resume session not supported")
+}
+
+// SetSessionConfigOption handles session configuration changes. Currently
+// supports the "model" config option to change the active model for a session.
+func (a *Agent) SetSessionConfigOption(ctx context.Context, params acp.SetSessionConfigOptionRequest) (acp.SetSessionConfigOptionResponse, error) {
+	// Extract session ID and config ID from whichever variant is present.
+	var sessionID string
+	var configID string
+	var value string
+
+	switch {
+	case params.ValueId != nil:
+		sessionID = string(params.ValueId.SessionId)
+		configID = string(params.ValueId.ConfigId)
+		value = string(params.ValueId.Value)
+	case params.Boolean != nil:
+		sessionID = string(params.Boolean.SessionId)
+		configID = string(params.Boolean.ConfigId)
+		// Boolean config options are not used for model selection.
+		log.Debug("acp: set_session_config_option (boolean)", "session", sessionID, "config", configID, "value", params.Boolean.Value)
+		return acp.SetSessionConfigOptionResponse{}, nil
+	default:
+		return acp.SetSessionConfigOptionResponse{}, acp.NewInvalidParams("unsupported config option variant")
 	}

-	return acp.SetSessionModelResponse{}, nil
+	sess, ok := a.registry.get(sessionID)
+	if !ok {
+		return acp.SetSessionConfigOptionResponse{}, acp.NewInvalidParams(fmt.Sprintf("session not found: %s", sessionID))
+	}
+
+	log.Debug("acp: set_session_config_option", "session", sessionID, "config", configID, "value", value)
+
+	// Handle known config options.
+	switch configID {
+	case "model":
+		if err := sess.kit.SetModel(ctx, value); err != nil {
+			return acp.SetSessionConfigOptionResponse{}, fmt.Errorf("set model: %w", err)
+		}
+	default:
+		log.Debug("acp: unknown config option", "config", configID)
+	}
+
+	return acp.SetSessionConfigOptionResponse{}, nil
 }

 // ---------------------------------------------------------------------------
@@ -230,24 +270,8 @@ func (a *Agent) subscribeEvents(ctx context.Context, k *kit.Kit, sessionID acp.S
 		var update *acp.SessionUpdate
 		switch ev := e.(type) {
 		case kit.MessageUpdateEvent:
-			// Handle models that wrap reasoning in <thinking> tags (Qwen, DeepSeek)
-			// Parse the chunk and separate reasoning from regular text
-			reasoning, text := a.parseThinkingTags(ev.Chunk)
-
-			// Send reasoning update if we have reasoning content
-			if reasoning != "" {
-				u := acp.UpdateAgentThoughtText(reasoning)
-				_ = a.conn.SessionUpdate(ctx, acp.SessionNotification{
-					SessionId: sessionID,
-					Update:    u,
-				})
-			}
-
-			// Send text update if we have text content
-			if text != "" {
-				u := acp.UpdateAgentMessageText(text)
-				update = &u
-			}
+			u := acp.UpdateAgentMessageText(ev.Chunk)
+			update = &u

 		case kit.ReasoningDeltaEvent:
 			u := acp.UpdateAgentThoughtText(ev.Delta)
@@ -430,81 +454,6 @@ func extractPromptContent(blocks []acp.ContentBlock) (string, []kit.LLMFilePart)
 	return strings.Join(textParts, "\n"), files
 }

-// parseThinkingTags parses a text chunk for <thinking> or  tags and separates
-// reasoning content from regular text. This handles models (Qwen, DeepSeek)
-// that wrap reasoning in XML-style tags instead of using proper reasoning events.
-// Returns (reasoningContent, textContent).
-func (a *Agent) parseThinkingTags(chunk string) (reasoning string, text string) {
-	// Handle empty chunk
-	if chunk == "" {
-		return "", ""
-	}
-
-	// Determine which tag format to use (long or short)
-	openTag := thinkingTagOpen
-	closeTag := thinkingTagClose
-
-	if strings.Contains(chunk, shortThinkTagOpen) || strings.Contains(chunk, shortThinkTagClose) {
-		openTag = shortThinkTagOpen
-		closeTag = shortThinkTagClose
-	} else if !strings.Contains(chunk, thinkingTagOpen) && !strings.Contains(chunk, thinkingTagClose) && !a.inThinkingTag {
-		// No tags at all and not in thinking mode - return as text
-		return "", chunk
-	}
-
-	// Check for opening tag
-	if strings.Contains(chunk, openTag) {
-		parts := strings.SplitN(chunk, openTag, 2)
-
-		// Content before the opening tag is regular text
-		if !a.inThinkingTag && parts[0] != "" {
-			text = parts[0]
-		}
-
-		a.inThinkingTag = true
-
-		// Content after the opening tag is reasoning
-		if len(parts) > 1 {
-			// Check if the same chunk contains the closing tag
-			if strings.Contains(parts[1], closeTag) {
-				innerParts := strings.SplitN(parts[1], closeTag, 2)
-				reasoning = innerParts[0]
-				a.inThinkingTag = false
-
-				// Content after closing tag is regular text
-				if len(innerParts) > 1 && innerParts[1] != "" {
-					text += innerParts[1]
-				}
-			} else if parts[1] != "" {
-				// No closing tag yet, all remaining content is reasoning
-				reasoning = parts[1]
-			}
-		}
-		return reasoning, text
-	}
-
-	// Check for closing tag
-	if strings.Contains(chunk, closeTag) {
-		parts := strings.SplitN(chunk, closeTag, 2)
-		a.inThinkingTag = false
-
-		// Content before closing tag is reasoning
-		reasoning = parts[0]
-
-		// Content after closing tag is regular text
-		if len(parts) > 1 && parts[1] != "" {
-			text = parts[1]
-		}
-		return reasoning, text
-	}
-
-	// No tags found - content goes to current mode
-	if a.inThinkingTag {
-		return chunk, ""
-	}
-	return "", chunk
-}
-
 // isTextMimeType returns true if the MIME type indicates text content.
 func isTextMimeType(mimeType string) bool {
 	return strings.HasPrefix(mimeType, "text/") ||
@@ -7,7 +7,9 @@ import (
 	"sync"

 	"github.com/charmbracelet/log"
+	"github.com/spf13/viper"

+	"github.com/mark3labs/kit/internal/extbridge"
 	"github.com/mark3labs/kit/internal/extensions"
 	kit "github.com/mark3labs/kit/pkg/kit"
 )
@@ -37,10 +39,21 @@ func newSessionRegistry() *sessionRegistry {
 // given working directory. The Kit-generated session ID is used as the ACP
 // session ID so the mapping is 1:1.
 func (r *sessionRegistry) create(ctx context.Context, cwd string) (*acpSession, error) {
+	// Each ACP session gets its own isolated config store (CLI is left nil) so
+	// per-session SetModel / SetThinkingLevel calls cannot race or bleed across
+	// the sessionRegistry. We seed the relevant root-command flag values from
+	// the process-global store (which cobra populated from flags) so launching
+	// `kit acp -m <model> [--thinking-level ...] [--provider-url ...]` is still
+	// honored; .kit.yml and KIT_* env vars are loaded per session by kit.New.
+	streamOn := true
 	kitInstance, err := kit.New(ctx, &kit.Options{
-		SessionDir: cwd,
-		Quiet:      true,
-		Streaming:  true,
+		SessionDir:     cwd,
+		Quiet:          true,
+		Streaming:      &streamOn,
+		Model:          viper.GetString("model"),
+		ThinkingLevel:  viper.GetString("thinking-level"),
+		ProviderURL:    viper.GetString("provider-url"),
+		ProviderAPIKey: viper.GetString("provider-api-key"),
 	})
 	if err != nil {
 		// Provide actionable guidance for provider auth errors, which are
@@ -60,142 +73,73 @@ func (r *sessionRegistry) create(ctx context.Context, cwd string) (*acpSession,

 	// Wire extension context with headless implementations so extensions
 	// work in ACP mode. TUI-dependent features (widgets, prompts, editor)
-	// become no-ops or return cancelled; all data/model/tool APIs work
-	// identically to interactive mode.
+	// become no-ops or return cancelled; all data/model/tool APIs come from
+	// extbridge.BaseContext and work identically to interactive mode.
 	if kitInstance.Extensions().HasExtensions() {
-		kitInstance.Extensions().SetContext(extensions.Context{
-			SessionID:   sessionID,
-			CWD:         cwd,
-			Model:       kitInstance.GetModelString(),
-			Interactive: false,
+		// Use a background context for subagent spawns: the create() ctx is
+		// request-scoped and may be cancelled before extensions spawn anything.
+		ec := extbridge.BaseContext(context.Background(), kitInstance)

-			// Output — route through structured logger.
-			Print:      func(text string) { log.Debug("extension: print", "text", text) },
-			PrintInfo:  func(text string) { log.Info("extension: info", "text", text) },
-			PrintError: func(text string) { log.Error("extension: error", "text", text) },
-			PrintBlock: func(opts extensions.PrintBlockOpts) {
-				log.Info("extension: block", "subtitle", opts.Subtitle, "text", opts.Text)
-			},
+		ec.SessionID = sessionID
+		ec.CWD = cwd
+		ec.Model = kitInstance.GetModelString()
+		ec.Interactive = false

-			// Message injection — no-ops for now; ACP clients drive prompts.
-			SendMessage:   func(string) {},
-			CancelAndSend: func(string) {},
-			Exit:          func() {},
+		// Output — route through structured logger.
+		ec.Print = func(text string) { log.Debug("extension: print", "text", text) }
+		ec.PrintInfo = func(text string) { log.Info("extension: info", "text", text) }
+		ec.PrintError = func(text string) { log.Error("extension: error", "text", text) }
+		ec.PrintBlock = func(opts extensions.PrintBlockOpts) {
+			log.Info("extension: block", "subtitle", opts.Subtitle, "text", opts.Text)
+		}

-			// TUI widgets/chrome — silent no-ops (no TUI in ACP).
-			SetWidget:       func(extensions.WidgetConfig) {},
-			RemoveWidget:    func(string) {},
-			SetHeader:       func(extensions.HeaderFooterConfig) {},
-			RemoveHeader:    func() {},
-			SetFooter:       func(extensions.HeaderFooterConfig) {},
-			RemoveFooter:    func() {},
-			SetEditor:       func(extensions.EditorConfig) {},
-			ResetEditor:     func() {},
-			SetEditorText:   func(string) {},
-			SetUIVisibility: func(extensions.UIVisibility) {},
-			SetStatus:       func(string, string, int) {},
-			RemoveStatus:    func(string) {},
+		// Message injection — no-ops for now; ACP clients drive prompts.
+		ec.SendMessage = func(string) {}
+		ec.CancelAndSend = func(string) {}
+		ec.NewSession = func(string) error {
+			return fmt.Errorf("new session not available in ACP mode")
+		}
+		ec.Exit = func() {}

-			// Interactive prompts — return cancelled (no user to prompt).
-			PromptSelect: func(extensions.PromptSelectConfig) extensions.PromptSelectResult {
-				return extensions.PromptSelectResult{Cancelled: true}
-			},
-			PromptConfirm: func(extensions.PromptConfirmConfig) extensions.PromptConfirmResult {
-				return extensions.PromptConfirmResult{Cancelled: true}
-			},
-			PromptInput: func(extensions.PromptInputConfig) extensions.PromptInputResult {
-				return extensions.PromptInputResult{Cancelled: true}
-			},
-			ShowOverlay: func(extensions.OverlayConfig) extensions.OverlayResult {
-				return extensions.OverlayResult{Cancelled: true, Index: -1}
-			},
-			SuspendTUI: func(callback func()) error { callback(); return nil },
+		// TUI widgets/chrome — silent no-ops (no TUI in ACP).
+		ec.SetWidget = func(extensions.WidgetConfig) {}
+		ec.RemoveWidget = func(string) {}
+		ec.SetHeader = func(extensions.HeaderFooterConfig) {}
+		ec.RemoveHeader = func() {}
+		ec.SetFooter = func(extensions.HeaderFooterConfig) {}
+		ec.RemoveFooter = func() {}
+		ec.SetEditor = func(extensions.EditorConfig) {}
+		ec.ResetEditor = func() {}
+		ec.SetEditorText = func(string) {}
+		ec.SetUIVisibility = func(extensions.UIVisibility) {}
+		ec.SetStatus = func(string, string, int) {}
+		ec.RemoveStatus = func(string) {}

-			// Data access — delegate to Kit instance.
-			GetContextStats: func() extensions.ContextStats {
-				s := kitInstance.GetContextStats()
-				return extensions.ContextStats{
-					EstimatedTokens: s.EstimatedTokens,
-					ContextLimit:    s.ContextLimit,
-					UsagePercent:    s.UsagePercent,
-					MessageCount:    s.MessageCount,
-				}
-			},
-			GetMessages:    func() []extensions.SessionMessage { return kitInstance.Extensions().GetSessionMessages() },
-			GetSessionPath: func() string { return kitInstance.GetSessionPath() },
-			AppendEntry: func(entryType, data string) (string, error) {
-				return kitInstance.Extensions().AppendEntry(entryType, data)
-			},
-			GetEntries: func(entryType string) []extensions.ExtensionEntry {
-				return kitInstance.Extensions().GetEntries(entryType)
-			},
+		// Interactive prompts — return cancelled (no user to prompt).
+		ec.PromptSelect = func(extensions.PromptSelectConfig) extensions.PromptSelectResult {
+			return extensions.PromptSelectResult{Cancelled: true}
+		}
+		ec.PromptConfirm = func(extensions.PromptConfirmConfig) extensions.PromptConfirmResult {
+			return extensions.PromptConfirmResult{Cancelled: true}
+		}
+		ec.PromptInput = func(extensions.PromptInputConfig) extensions.PromptInputResult {
+			return extensions.PromptInputResult{Cancelled: true}
+		}
+		ec.ShowOverlay = func(extensions.OverlayConfig) extensions.OverlayResult {
+			return extensions.OverlayResult{Cancelled: true, Index: -1}
+		}
+		ec.SuspendTUI = func(callback func()) error { callback(); return nil }

-			// Options, model, and tool management.
-			GetOption: func(name string) string { return kitInstance.Extensions().GetOption(name) },
-			SetOption: func(name, value string) { kitInstance.Extensions().SetOption(name, value) },
-			SetModel: func(modelString string) error {
-				previousModel := kitInstance.Extensions().GetContext().Model
-				if err := kitInstance.SetModel(context.Background(), modelString); err != nil {
-					return err
-				}
-				kitInstance.Extensions().UpdateContextModel(modelString)
-				kitInstance.Extensions().EmitModelChange(modelString, previousModel, "extension")
-				return nil
-			},
-			GetAvailableModels: func() []extensions.ModelInfoEntry { return kitInstance.GetAvailableModels() },
-			EmitCustomEvent:    func(name, data string) { kitInstance.Extensions().EmitCustomEvent(name, data) },
-			GetAllTools:        func() []extensions.ToolInfo { return kitInstance.Extensions().GetToolInfos() },
-			SetActiveTools:     func(names []string) { kitInstance.Extensions().SetActiveTools(names) },
+		// Render — fall back to logging.
+		ec.RenderMessage = func(name, content string) {
+			renderer := kitInstance.Extensions().GetMessageRenderer(name)
+			if renderer != nil && renderer.Render != nil {
+				content = renderer.Render(content, 80)
+			}
+			log.Info("extension: message", "renderer", name, "content", content)
+		}

-			// LLM completions and subagents.
-			Complete: func(req extensions.CompleteRequest) (extensions.CompleteResponse, error) {
-				return kitInstance.ExecuteCompletion(context.Background(), req)
-			},
-			SpawnSubagent: func(config extensions.SubagentConfig) (*extensions.SubagentHandle, *extensions.SubagentResult, error) {
-				sdkCfg := kit.SubagentConfig{
-					Prompt:       config.Prompt,
-					Model:        config.Model,
-					SystemPrompt: config.SystemPrompt,
-					Timeout:      config.Timeout,
-					NoSession:    config.NoSession,
-				}
-				if config.OnEvent != nil {
-					sdkCfg.OnEvent = func(e kit.Event) {
-						se := sdkEventToSubagentEvent(e)
-						if se.Type != "" {
-							config.OnEvent(se)
-						}
-					}
-				}
-				result, err := kitInstance.Subagent(context.Background(), sdkCfg)
-				if result == nil {
-					return nil, &extensions.SubagentResult{Error: err}, err
-				}
-				extResult := &extensions.SubagentResult{
-					Response:  result.Response,
-					Error:     err,
-					SessionID: result.SessionID,
-					Elapsed:   result.Elapsed,
-				}
-				if result.Usage != nil {
-					extResult.Usage = &extensions.SubagentUsage{
-						InputTokens:  result.Usage.InputTokens,
-						OutputTokens: result.Usage.OutputTokens,
-					}
-				}
-				return nil, extResult, err
-			},
-
-			// Render — fall back to logging.
-			RenderMessage: func(name, content string) {
-				renderer := kitInstance.Extensions().GetMessageRenderer(name)
-				if renderer != nil && renderer.Render != nil {
-					content = renderer.Render(content, 80)
-				}
-				log.Info("extension: message", "renderer", name, "content", content)
-			},
-			ReloadExtensions: func() error { return kitInstance.Extensions().Reload() },
-		})
+		kitInstance.Extensions().SetContext(ec)
 		kitInstance.Extensions().EmitSessionStart()
 	}

@@ -232,6 +176,20 @@ func (r *sessionRegistry) closeAll() {
 	}
 }

+// remove closes and removes a single session by ID.
+func (r *sessionRegistry) remove(sessionID string) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	sess, ok := r.sessions[sessionID]
+	if !ok {
+		return
+	}
+	if sess.kit != nil {
+		_ = sess.kit.Close()
+	}
+	delete(r.sessions, sessionID)
+}
+
 // cancelPrompt cancels the current prompt for a session, if any.
 func (s *acpSession) cancelPrompt() {
 	s.cancelMu.Lock()
@@ -255,40 +213,3 @@ func (s *acpSession) clearCancel() {
 	defer s.cancelMu.Unlock()
 	s.cancelFn = nil
 }
-
-// sdkEventToSubagentEvent converts an SDK event to an extension SubagentEvent.
-func sdkEventToSubagentEvent(e kit.Event) extensions.SubagentEvent {
-	switch ev := e.(type) {
-	case kit.MessageUpdateEvent:
-		return extensions.SubagentEvent{Type: "text", Content: ev.Chunk}
-	case kit.ReasoningDeltaEvent:
-		return extensions.SubagentEvent{Type: "reasoning", Content: ev.Delta}
-	case kit.ToolCallEvent:
-		return extensions.SubagentEvent{
-			Type: "tool_call", ToolCallID: ev.ToolCallID,
-			ToolName: ev.ToolName, ToolKind: ev.ToolKind, ToolArgs: ev.ToolArgs,
-		}
-	case kit.ToolExecutionStartEvent:
-		return extensions.SubagentEvent{
-			Type: "tool_execution_start", ToolCallID: ev.ToolCallID,
-			ToolName: ev.ToolName, ToolKind: ev.ToolKind,
-		}
-	case kit.ToolExecutionEndEvent:
-		return extensions.SubagentEvent{
-			Type: "tool_execution_end", ToolCallID: ev.ToolCallID,
-			ToolName: ev.ToolName, ToolKind: ev.ToolKind,
-		}
-	case kit.ToolResultEvent:
-		return extensions.SubagentEvent{
-			Type: "tool_result", ToolCallID: ev.ToolCallID,
-			ToolName: ev.ToolName, ToolKind: ev.ToolKind,
-			ToolResult: ev.Result, IsError: ev.IsError,
-		}
-	case kit.TurnStartEvent:
-		return extensions.SubagentEvent{Type: "turn_start"}
-	case kit.TurnEndEvent:
-		return extensions.SubagentEvent{Type: "turn_end"}
-	default:
-		return extensions.SubagentEvent{}
-	}
-}
@@ -6,6 +6,8 @@ import (
 	"fmt"
 	"io"
 	"strings"
+	"sync"
+	"time"

 	"charm.land/fantasy"

@@ -30,11 +32,21 @@ type AgentConfig struct {
 	// If nil, remote MCP servers that require OAuth will fail to connect.
 	AuthHandler tools.MCPAuthHandler

+	// TokenStoreFactory, if non-nil, creates a custom token store for each
+	// remote MCP server's OAuth tokens. When nil, the default file-based
+	// token store is used.
+	TokenStoreFactory tools.TokenStoreFactory
+
 	// CoreTools overrides the default core tool set. If empty, core.AllTools()
 	// is used. This allows SDK users to provide a custom tool set (e.g.
 	// CodingTools or tools with a custom WorkDir).
 	CoreTools []fantasy.AgentTool

+	// DisableCoreTools, when true, prevents loading any core tools.
+	// If both DisableCoreTools is true and CoreTools is empty, the agent
+	// will have no tools (useful for simple chat completions).
+	DisableCoreTools bool
+
 	// ToolWrapper is an optional function that wraps the combined tool list
 	// before it is passed to the LLM agent. Used by the extensions system
 	// to intercept tool calls/results.
@@ -43,6 +55,16 @@ type AgentConfig struct {
 	// ExtraTools are additional tools to include alongside core and MCP tools.
 	// Used by extensions to register custom tools.
 	ExtraTools []fantasy.AgentTool
+
+	// OnMCPServerLoaded, if non-nil, is called when each MCP server finishes
+	// loading (successfully or with error). The callback receives the server
+	// name, tool count, and any error. Called from the background goroutine.
+	OnMCPServerLoaded func(serverName string, toolCount int, err error)
+
+	// MCPTaskConfig configures task-augmented tools/call execution. The
+	// zero value preserves historical synchronous-only behaviour for any
+	// server that didn't advertise task support during initialize.
+	MCPTaskConfig tools.MCPTaskConfig
 }

 // ToolCallHandler is a function type for handling tool calls as they happen.
@@ -72,6 +94,19 @@ type ReasoningDeltaHandler func(delta string)
 // Called when the last reasoning token has been processed, before text streaming starts.
 type ReasoningCompleteHandler func()

+// ToolCallStartHandler is a function type for handling the moment when the LLM
+// begins generating tool call arguments. The tool name is known but the full
+// argument JSON is still streaming.
+type ToolCallStartHandler func(toolCallID, toolName string)
+
+// ToolCallDeltaHandler is a function type for handling streamed fragments of
+// tool call arguments as they arrive from the LLM.
+type ToolCallDeltaHandler func(toolCallID, delta string)
+
+// ToolCallEndHandler is a function type for handling the end of tool argument
+// streaming, before the tool call is parsed and execution begins.
+type ToolCallEndHandler func(toolCallID string)
+
 // ToolOutputHandler is a function type for handling streaming tool output chunks.
 // Used by tools like bash to stream output as it arrives rather than waiting
 // for the command to complete. The isStderr flag indicates if the chunk
@@ -79,11 +114,95 @@ type ReasoningCompleteHandler func()
 // Note: This is an alias for core.ToolOutputCallback to avoid import cycles.
 type ToolOutputHandler = core.ToolOutputCallback

+// PasswordPromptHandler is a function type for password prompts.
+// Used by the bash tool when sudo requires a password. The handler receives
+// a prompt message and returns the password and whether it was cancelled.
+// Note: This is an alias for core.PasswordPromptCallback.
+type PasswordPromptHandler = core.PasswordPromptCallback
+
+// StepMessagesHandler is a function type for persisting messages after each
+// complete step in a multi-step agent turn. The handler receives the messages
+// produced by the step (typically an assistant message with tool calls followed
+// by a tool-role message with results, or a final assistant message with text).
+// This enables incremental session persistence so that progress is saved as
+// it happens rather than only at the end of the turn.
+type StepMessagesHandler func(stepMessages []fantasy.Message)
+
 // StepUsageHandler is a function type for handling token usage after each
 // complete step in a multi-step agent turn. This enables real-time cost
 // tracking during long-running tool-calling conversations.
 type StepUsageHandler func(inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens int64)

+// StepStartHandler is called when a new LLM step begins within a turn.
+type StepStartHandler func(stepNumber int)
+
+// StepFinishHandler is called when a step completes with full context.
+type StepFinishHandler func(stepNumber int, hasToolCalls bool, finishReason string, usage fantasy.Usage)
+
+// TextStartHandler is called when the LLM begins generating text content.
+type TextStartHandler func(id string)
+
+// TextEndHandler is called when the LLM finishes generating text content.
+type TextEndHandler func(id string)
+
+// ReasoningStartHandler is called when the LLM begins reasoning/thinking.
+type ReasoningStartHandler func(id string)
+
+// WarningsHandler is called when the LLM provider returns warnings.
+type WarningsHandler func(warnings []string)
+
+// SourceHandler is called when the LLM references a source.
+type SourceHandler func(sourceType, id, url, title string)
+
+// StreamFinishHandler is called when a per-step LLM stream completes.
+type StreamFinishHandler func(usage fantasy.Usage, finishReason string)
+
+// ErrorHandler is called when an agent-level error occurs.
+type ErrorHandler func(err error)
+
+// RetryHandler is called when the LLM request is retried.
+type RetryHandler func(attempt int, err error)
+
+// PrepareStepHandler is called between steps to allow message modification.
+// It receives the step number and current messages, and returns replacement
+// messages (or nil to keep unchanged).
+type PrepareStepHandler func(stepNumber int, messages []fantasy.Message) []fantasy.Message
+
+// GenerateCallbacks consolidates all callback functions for
+// GenerateWithCallbacks into a single struct, replacing what was previously
+// 16+ positional callback parameters. New fields default to nil, so adding
+// new callbacks does not break existing callers.
+type GenerateCallbacks struct {
+	OnToolCall          ToolCallHandler
+	OnToolExecution     ToolExecutionHandler
+	OnToolResult        ToolResultHandler
+	OnResponse          ResponseHandler
+	OnToolCallContent   ToolCallContentHandler
+	OnStreamingResponse StreamingResponseHandler
+	OnReasoningDelta    ReasoningDeltaHandler
+	OnReasoningComplete ReasoningCompleteHandler
+	OnToolOutput        ToolOutputHandler
+	OnStepMessages      StepMessagesHandler
+	OnStepUsage         StepUsageHandler
+	OnPasswordPrompt    PasswordPromptHandler
+	OnToolCallStart     ToolCallStartHandler
+	OnToolCallDelta     ToolCallDeltaHandler
+	OnToolCallEnd       ToolCallEndHandler
+
+	// New callbacks for previously unwired Fantasy lifecycle events.
+	OnStepStart      StepStartHandler
+	OnStepFinish     StepFinishHandler
+	OnTextStart      TextStartHandler
+	OnTextEnd        TextEndHandler
+	OnReasoningStart ReasoningStartHandler
+	OnWarnings       WarningsHandler
+	OnSource         SourceHandler
+	OnStreamFinish   StreamFinishHandler
+	OnError          ErrorHandler
+	OnRetry          RetryHandler
+	OnPrepareStep    PrepareStepHandler
+}
+
 // Agent represents an AI agent with core tool integration using the LLM library.
 // Core tools (bash, read, write, edit, grep, find, ls) are registered as direct
 // AgentTool implementations — no MCP layer, no serialization overhead.
@@ -112,11 +231,27 @@ type Agent struct {
 	skipMaxOutputTokens bool
 	modelConfig         *models.ProviderConfig

+	// authHandler and tokenStoreFactory are stored from AgentConfig so that
+	// AddMCPServer() can propagate them when creating a new MCPToolManager
+	// at runtime (i.e. when no MCP servers were configured at init time).
+	authHandler       tools.MCPAuthHandler
+	tokenStoreFactory tools.TokenStoreFactory
+
+	// mcpTaskConfig is stored from AgentConfig so AddMCPServer() can
+	// propagate it to a lazily-created MCPToolManager.
+	mcpTaskConfig tools.MCPTaskConfig
+
 	// mcpReady is closed when background MCP tool loading completes (success
 	// or failure). nil when no MCP servers are configured.
 	mcpReady chan struct{}
 	// mcpErr holds any error from background MCP loading.
 	mcpErr error
+
+	// promptMu serializes runtime updates to systemPrompt and the
+	// accompanying fantasy agent rebuild so concurrent SetSystemPrompt
+	// callers (e.g. Kit.applyComposedSystemPrompt invoked from multiple
+	// goroutines) don't race on a.systemPrompt / a.fantasyAgent.
+	promptMu sync.Mutex
 }

 // GenerateWithLoopResult contains the result and conversation history from an agent interaction.
@@ -131,6 +266,11 @@ type GenerateWithLoopResult struct {
 	TotalUsage fantasy.Usage
 	// StopReason is the LLM provider's finish reason for the final response.
 	StopReason string
+	// PersistedMessageCount is the number of new messages (beyond the original
+	// input) that were already persisted incrementally via OnStepMessages during
+	// generation. The caller should skip these when doing post-generation
+	// persistence to avoid duplicates.
+	PersistedMessageCount int
 }

 // NewAgent creates a new Agent with core tools and optional MCP tool integration.
@@ -148,8 +288,16 @@ func NewAgent(ctx context.Context, agentConfig *AgentConfig) (*Agent, error) {

 	// Register core tools (direct AgentTool implementations, no MCP overhead).
 	// Use caller-provided tools if set, otherwise default to all core tools.
-	coreTools := agentConfig.CoreTools
-	if len(coreTools) == 0 {
+	// DisableCoreTools allows explicitly having zero tools (for chat-only mode).
+	var coreTools []fantasy.AgentTool
+	if agentConfig.DisableCoreTools && len(agentConfig.CoreTools) == 0 {
+		// Explicitly zero tools - chat-only mode
+		coreTools = nil
+	} else if len(agentConfig.CoreTools) > 0 {
+		// Custom tools provided - use them
+		coreTools = agentConfig.CoreTools
+	} else {
+		// Default: load all core tools
 		coreTools = core.AllTools()
 	}

@@ -195,19 +343,30 @@ func NewAgent(ctx context.Context, agentConfig *AgentConfig) (*Agent, error) {
 		providerOptions:     providerResult.ProviderOptions,
 		skipMaxOutputTokens: providerResult.SkipMaxOutputTokens,
 		modelConfig:         agentConfig.ModelConfig,
+		authHandler:         agentConfig.AuthHandler,
+		tokenStoreFactory:   agentConfig.TokenStoreFactory,
+		mcpTaskConfig:       agentConfig.MCPTaskConfig,
 	}

 	// Start MCP tool loading in the background if servers are configured.
 	// The mcpReady channel is closed when loading completes (success or failure).
 	if agentConfig.MCPConfig != nil && len(agentConfig.MCPConfig.MCPServers) > 0 {
 		toolManager := tools.NewMCPToolManager()
-		toolManager.SetModel(providerResult.Model)
 		if agentConfig.AuthHandler != nil {
 			toolManager.SetAuthHandler(agentConfig.AuthHandler)
 		}
+		if agentConfig.TokenStoreFactory != nil {
+			toolManager.SetTokenStoreFactory(agentConfig.TokenStoreFactory)
+		}
 		if agentConfig.DebugLogger != nil {
 			toolManager.SetDebugLogger(agentConfig.DebugLogger)
 		}
+		// Set per-server loaded callback if provided.
+		if agentConfig.OnMCPServerLoaded != nil {
+			toolManager.SetOnServerLoaded(agentConfig.OnMCPServerLoaded)
+		}
+		// Apply task-augmented tool execution config (zero value = no-op).
+		toolManager.SetTaskConfig(agentConfig.MCPTaskConfig)
 		a.toolManager = toolManager
 		a.mcpReady = make(chan struct{})

@@ -274,7 +433,7 @@ func (a *Agent) rebuildFantasyAgent() {
 	allTools := make([]fantasy.AgentTool, len(a.coreTools))
 	copy(allTools, a.coreTools)
 	if a.toolManager != nil {
-		allTools = append(allTools, a.toolManager.GetTools()...)
+		allTools = append(allTools, mcpToolsToAgentTools(a.toolManager.GetTools(), a.toolManager)...)
 	}
 	if len(a.extraTools) > 0 {
 		allTools = append(allTools, a.extraTools...)
@@ -354,21 +513,20 @@ func (a *Agent) GenerateWithLoop(ctx context.Context, messages []fantasy.Message
 	onToolCall ToolCallHandler, onToolExecution ToolExecutionHandler, onToolResult ToolResultHandler,
 	onResponse ResponseHandler, onToolCallContent ToolCallContentHandler,
 ) (*GenerateWithLoopResult, error) {
-	return a.GenerateWithLoopAndStreaming(ctx, messages, onToolCall, onToolExecution, onToolResult,
-		onResponse, onToolCallContent, nil, nil, nil, nil, nil)
+	return a.GenerateWithCallbacks(ctx, messages, GenerateCallbacks{
+		OnToolCall:        onToolCall,
+		OnToolExecution:   onToolExecution,
+		OnToolResult:      onToolResult,
+		OnResponse:        onResponse,
+		OnToolCallContent: onToolCallContent,
+	})
 }

-// GenerateWithLoopAndStreaming processes messages using the agent with streaming and callbacks.
+// GenerateWithCallbacks processes messages using the agent with streaming and callbacks.
 // The agent handles the tool call loop internally. We map the rich callback system
 // to kit's existing callback interface for UI integration.
-func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fantasy.Message,
-	onToolCall ToolCallHandler, onToolExecution ToolExecutionHandler, onToolResult ToolResultHandler,
-	onResponse ResponseHandler, onToolCallContent ToolCallContentHandler,
-	onStreamingResponse StreamingResponseHandler,
-	onReasoningDelta ReasoningDeltaHandler,
-	onReasoningComplete ReasoningCompleteHandler,
-	onToolOutput ToolOutputHandler,
-	onStepUsage StepUsageHandler,
+func (a *Agent) GenerateWithCallbacks(ctx context.Context, messages []fantasy.Message,
+	cb GenerateCallbacks,
 ) (*GenerateWithLoopResult, error) {

 	// Wait for background MCP tool loading to complete and rebuild the
@@ -377,8 +535,13 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 	a.ensureMCPTools()

 	// Inject tool output handler into context for use by core tools (e.g., bash).
-	if onToolOutput != nil {
-		ctx = core.ContextWithToolOutputCallback(ctx, onToolOutput)
+	if cb.OnToolOutput != nil {
+		ctx = core.ContextWithToolOutputCallback(ctx, cb.OnToolOutput)
+	}
+
+	// Inject password prompt handler into context for use by bash tool.
+	if cb.OnPasswordPrompt != nil {
+		ctx = core.ContextWithPasswordPrompt(ctx, cb.OnPasswordPrompt)
 	}

 	// The agent requires the current user input as Prompt, with prior messages as history.
@@ -391,15 +554,25 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 	// This avoids type conflicts with provider-level options.
 	history = applyCacheControlToMessages(history)

-	// Track current tool call args for callbacks
-	var currentToolArgs string
+	// Track tool call args per-ToolCallID so parallel tool calls in a single
+	// step don't clobber each other. Without this, OnToolResult callbacks would
+	// all see the args of the last OnToolCall in the step. The mutex guards
+	// against the possibility that the underlying streaming layer dispatches
+	// callbacks from multiple goroutines.
+	toolCallArgs := make(map[string]string)
+	var toolCallArgsMu sync.Mutex

 	// Use the streaming path when streaming is enabled OR when any callbacks are
 	// provided. The agent only exposes tool/step callbacks on AgentStreamCall, so
 	// Stream is required to observe tool execution in real time. The non-streaming
 	// Generate path is reserved for the simple case with no callbacks at all.
-	hasCallbacks := onToolCall != nil || onToolExecution != nil || onToolResult != nil ||
-		onToolCallContent != nil || onStreamingResponse != nil || onReasoningDelta != nil
+	hasCallbacks := cb.OnToolCall != nil || cb.OnToolExecution != nil || cb.OnToolResult != nil ||
+		cb.OnToolCallContent != nil || cb.OnStreamingResponse != nil || cb.OnReasoningDelta != nil ||
+		cb.OnToolCallStart != nil || cb.OnToolCallDelta != nil || cb.OnToolCallEnd != nil ||
+		cb.OnStepStart != nil || cb.OnStepFinish != nil || cb.OnTextStart != nil ||
+		cb.OnTextEnd != nil || cb.OnReasoningStart != nil || cb.OnWarnings != nil ||
+		cb.OnSource != nil || cb.OnStreamFinish != nil || cb.OnError != nil ||
+		cb.OnRetry != nil || cb.OnPrepareStep != nil

 	if a.streamingEnabled || hasCallbacks {
 		// Track completed step messages so we can return partial results
@@ -407,6 +580,12 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 		// when it returns an error, but the OnStepFinish callback fires
 		// for every step that completed before the error occurred.
 		var completedStepMessages []fantasy.Message
+		// persistedCount tracks how many new messages (beyond the original
+		// input) were persisted incrementally via cb.OnStepMessages, so the
+		// caller can skip them during post-generation persistence.
+		var persistedCount int
+		// stepCounter tracks the current step number for StepStart/StepFinish events.
+		var stepCounter int

 		// Use the streaming agent
 		streamCall := fantasy.AgentStreamCall{
@@ -414,13 +593,73 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 			Files:    files,
 			Messages: history,

+			// Tool input streaming callbacks — fire during tool argument generation
+			OnToolInputStart: func(id, toolName string) error {
+				if ctx.Err() != nil {
+					return ctx.Err()
+				}
+				if cb.OnToolCallStart != nil {
+					cb.OnToolCallStart(id, toolName)
+				}
+				return nil
+			},
+			OnToolInputDelta: func(id, delta string) error {
+				if ctx.Err() != nil {
+					return ctx.Err()
+				}
+				if cb.OnToolCallDelta != nil {
+					cb.OnToolCallDelta(id, delta)
+				}
+				return nil
+			},
+			OnToolInputEnd: func(id string) error {
+				if ctx.Err() != nil {
+					return ctx.Err()
+				}
+				if cb.OnToolCallEnd != nil {
+					cb.OnToolCallEnd(id)
+				}
+				return nil
+			},
+
+			// Text start/end callbacks
+			OnTextStart: func(id string) error {
+				if ctx.Err() != nil {
+					return ctx.Err()
+				}
+				if cb.OnTextStart != nil {
+					cb.OnTextStart(id)
+				}
+				return nil
+			},
+			OnTextEnd: func(id string) error {
+				if ctx.Err() != nil {
+					return ctx.Err()
+				}
+				if cb.OnTextEnd != nil {
+					cb.OnTextEnd(id)
+				}
+				return nil
+			},
+
+			// Reasoning start callback
+			OnReasoningStart: func(id string, _ fantasy.ReasoningContent) error {
+				if ctx.Err() != nil {
+					return ctx.Err()
+				}
+				if cb.OnReasoningStart != nil {
+					cb.OnReasoningStart(id)
+				}
+				return nil
+			},
+
 			// Reasoning/thinking streaming callback
 			OnReasoningDelta: func(id, delta string) error {
 				if ctx.Err() != nil {
 					return ctx.Err()
 				}
-				if onReasoningDelta != nil {
-					onReasoningDelta(delta)
+				if cb.OnReasoningDelta != nil {
+					cb.OnReasoningDelta(delta)
 				}
 				return nil
 			},
@@ -430,8 +669,8 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 				if ctx.Err() != nil {
 					return ctx.Err()
 				}
-				if onReasoningComplete != nil {
-					onReasoningComplete()
+				if cb.OnReasoningComplete != nil {
+					cb.OnReasoningComplete()
 				}
 				return nil
 			},
@@ -441,8 +680,64 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 				if ctx.Err() != nil {
 					return ctx.Err()
 				}
-				if onStreamingResponse != nil {
-					onStreamingResponse(text)
+				if cb.OnStreamingResponse != nil {
+					cb.OnStreamingResponse(text)
+				}
+				return nil
+			},
+
+			// Warnings callback
+			OnWarnings: func(warnings []fantasy.CallWarning) error {
+				if ctx.Err() != nil {
+					return ctx.Err()
+				}
+				if cb.OnWarnings != nil {
+					strs := make([]string, len(warnings))
+					for i, w := range warnings {
+						strs[i] = w.Message
+					}
+					cb.OnWarnings(strs)
+				}
+				return nil
+			},
+
+			// Source callback
+			OnSource: func(source fantasy.SourceContent) error {
+				if ctx.Err() != nil {
+					return ctx.Err()
+				}
+				if cb.OnSource != nil {
+					cb.OnSource(string(source.SourceType), source.ID, source.URL, source.Title)
+				}
+				return nil
+			},
+
+			// Stream finish callback (per-step stream completion)
+			OnStreamFinish: func(usage fantasy.Usage, finishReason fantasy.FinishReason, _ fantasy.ProviderMetadata) error {
+				if ctx.Err() != nil {
+					return ctx.Err()
+				}
+				if cb.OnStreamFinish != nil {
+					cb.OnStreamFinish(usage, string(finishReason))
+				}
+				return nil
+			},
+
+			// Error callback
+			OnError: func(err error) {
+				if cb.OnError != nil {
+					cb.OnError(err)
+				}
+			},
+
+			// Step start callback
+			OnStepStart: func(stepNumber int) error {
+				if ctx.Err() != nil {
+					return ctx.Err()
+				}
+				stepCounter = stepNumber
+				if cb.OnStepStart != nil {
+					cb.OnStepStart(stepNumber)
 				}
 				return nil
 			},
@@ -452,16 +747,18 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 				if ctx.Err() != nil {
 					return ctx.Err()
 				}
-				currentToolArgs = tc.Input
+				toolCallArgsMu.Lock()
+				toolCallArgs[tc.ToolCallID] = tc.Input
+				toolCallArgsMu.Unlock()

 				// Notify about the tool call
-				if onToolCall != nil {
-					onToolCall(tc.ToolCallID, tc.ToolName, tc.Input)
+				if cb.OnToolCall != nil {
+					cb.OnToolCall(tc.ToolCallID, tc.ToolName, tc.Input)
 				}

 				// Notify tool execution starting
-				if onToolExecution != nil {
-					onToolExecution(tc.ToolCallID, tc.ToolName, tc.Input, true)
+				if cb.OnToolExecution != nil {
+					cb.OnToolExecution(tc.ToolCallID, tc.ToolName, tc.Input, true)
 				}

 				return nil
@@ -472,15 +769,22 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 				if ctx.Err() != nil {
 					return ctx.Err()
 				}
+				// Look up the args recorded for this specific tool call. Delete
+				// the entry so the map doesn't accumulate across steps.
+				toolCallArgsMu.Lock()
+				args := toolCallArgs[tr.ToolCallID]
+				delete(toolCallArgs, tr.ToolCallID)
+				toolCallArgsMu.Unlock()
+
 				// Notify tool execution finished
-				if onToolExecution != nil {
-					onToolExecution(tr.ToolCallID, tr.ToolName, currentToolArgs, false)
+				if cb.OnToolExecution != nil {
+					cb.OnToolExecution(tr.ToolCallID, tr.ToolName, args, false)
 				}

-				if onToolResult != nil {
+				if cb.OnToolResult != nil {
 					// Extract result text and error status
 					resultText, isError := extractToolResultText(tr)
-					onToolResult(tr.ToolCallID, tr.ToolName, currentToolArgs, resultText, tr.ClientMetadata, isError)
+					cb.OnToolResult(tr.ToolCallID, tr.ToolName, args, resultText, tr.ClientMetadata, isError)
 				}

 				return nil
@@ -492,71 +796,101 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 				// persisted even if a later step is cancelled.
 				completedStepMessages = append(completedStepMessages, step.Messages...)

+				// Persist step messages incrementally so progress is saved
+				// as it happens rather than only at the end of the turn.
+				if cb.OnStepMessages != nil && len(step.Messages) > 0 {
+					cb.OnStepMessages(step.Messages)
+					persistedCount += len(step.Messages)
+				}
+
 				if ctx.Err() != nil {
 					return ctx.Err()
 				}
 				// Check if step has text content alongside tool calls
 				text := step.Content.Text()
 				toolCalls := step.Content.ToolCalls()
-				if text != "" && len(toolCalls) > 0 && onToolCallContent != nil {
-					onToolCallContent(text)
+				if text != "" && len(toolCalls) > 0 && cb.OnToolCallContent != nil {
+					cb.OnToolCallContent(text)
 				}
 				// Emit step usage for real-time cost tracking
-				if onStepUsage != nil {
-					onStepUsage(step.Usage.InputTokens, step.Usage.OutputTokens,
+				if cb.OnStepUsage != nil {
+					cb.OnStepUsage(step.Usage.InputTokens, step.Usage.OutputTokens,
 						step.Usage.CacheReadTokens, step.Usage.CacheCreationTokens)
 				}
+				// Emit unified step finish event
+				if cb.OnStepFinish != nil {
+					cb.OnStepFinish(stepCounter, len(toolCalls) > 0, string(step.FinishReason), step.Usage)
+				}
 				return nil
 			},
 		}

-		// If a steer channel is attached to the context, wire up a
-		// PrepareStep function that drains the channel between steps
-		// and injects pending steer messages as user messages before
-		// the next LLM call. This enables graceful mid-turn steering
-		// without cancelling in-progress tool execution.
-		if steerCh := steerChFromContext(ctx); steerCh != nil {
-			onConsumed := steerConsumedFromContext(ctx)
+		// Always wire up PrepareStep to handle both steering and the
+		// OnPrepareStep hook. Steering drains its channel first, then
+		// OnPrepareStep hooks run against the (possibly already steered)
+		// messages.
+		steerCh := steerChFromContext(ctx)
+		onConsumed := steerConsumedFromContext(ctx)
+		hasSteering := steerCh != nil
+		hasPrepareStepHook := cb.OnPrepareStep != nil
+
+		if hasSteering || hasPrepareStepHook {
 			streamCall.PrepareStep = func(
 				stepCtx context.Context,
 				opts fantasy.PrepareStepFunctionOptions,
 			) (context.Context, fantasy.PrepareStepResult, error) {
-				// Drain all pending steer messages (non-blocking).
-				var steered []SteerMessage
-				for {
-					select {
-					case msg := <-steerCh:
-						steered = append(steered, msg)
-					default:
-						goto done
-					}
-				}
-			done:
 				result := fantasy.PrepareStepResult{
 					Model:    opts.Model,
 					Messages: opts.Messages,
 				}
-				if len(steered) > 0 {
-					// Inject each steer message as a user message so the
-					// LLM sees the redirection on the next step.
-					for _, sm := range steered {
-						result.Messages = append(result.Messages,
-							fantasy.NewUserMessage(sm.Text, sm.Files...))
+
+				// Phase 1: Drain steering channel (if present).
+				if hasSteering {
+					var steered []SteerMessage
+					for {
+						select {
+						case msg := <-steerCh:
+							steered = append(steered, msg)
+						default:
+							goto done
+						}
 					}
-					// Notify that steer messages were consumed.
-					if onConsumed != nil {
-						onConsumed(len(steered))
+				done:
+					if len(steered) > 0 {
+						for _, sm := range steered {
+							result.Messages = append(result.Messages,
+								fantasy.NewUserMessage(sm.Text, sm.Files...))
+						}
+						if onConsumed != nil {
+							onConsumed(len(steered))
+						}
+					}
+				}
+
+				// Phase 2: Run OnPrepareStep hook (if registered).
+				if hasPrepareStepHook {
+					if replacement := cb.OnPrepareStep(opts.StepNumber, result.Messages); replacement != nil {
+						result.Messages = replacement
 					}
 				}

 				// Apply message-level cache control for Anthropic models.
-				// This avoids type conflicts with provider-level options.
 				result.Messages = applyCacheControlToMessages(result.Messages)

 				return stepCtx, result, nil
 			}
 		}

+		// Wire OnRetry callback if provided.
+		if cb.OnRetry != nil {
+			streamCall.OnRetry = func(err *fantasy.ProviderError, _ time.Duration) {
+				// Use the retry number from the error if available; Fantasy
+				// doesn't pass a counter directly, so we approximate with a
+				// counter incremented on each call.
+				cb.OnRetry(0, err)
+			}
+		}
+
 		result, err := a.fantasyAgent.Stream(ctx, streamCall)
 		if err != nil {
 			// On cancellation (or any error), return a partial result
@@ -570,7 +904,8 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 				partialMessages = append(partialMessages, messages...)
 				partialMessages = append(partialMessages, completedStepMessages...)
 				return &GenerateWithLoopResult{
-					ConversationMessages: partialMessages,
+					ConversationMessages:  partialMessages,
+					PersistedMessageCount: persistedCount,
 				}, err
 			}
 			return nil, err
@@ -581,11 +916,13 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 		// empty (e.g. reasoning-only responses) so the UI properly resets
 		// the stream component and avoids duplicate content on the next
 		// flush.
-		if onResponse != nil {
-			onResponse(result.Response.Content.Text())
+		if cb.OnResponse != nil {
+			cb.OnResponse(result.Response.Content.Text())
 		}

-		return convertAgentResult(result, messages), nil
+		r := convertAgentResult(result, messages)
+		r.PersistedMessageCount = persistedCount
+		return r, nil
 	}

 	// Non-streaming path with no callbacks — use the simpler Generate call.
@@ -600,8 +937,8 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan

 	// For non-streaming, fire the response callback so callers can reset
 	// streaming state (see streaming path comment above).
-	if onResponse != nil {
-		onResponse(result.Response.Content.Text())
+	if cb.OnResponse != nil {
+		cb.OnResponse(result.Response.Content.Text())
 	}

 	return convertAgentResult(result, messages), nil
@@ -742,7 +1079,7 @@ func (a *Agent) GetTools() []fantasy.AgentTool {
 	allTools := make([]fantasy.AgentTool, len(a.coreTools))
 	copy(allTools, a.coreTools)
 	if a.toolManager != nil {
-		allTools = append(allTools, a.toolManager.GetTools()...)
+		allTools = append(allTools, mcpToolsToAgentTools(a.toolManager.GetTools(), a.toolManager)...)
 	}
 	if len(a.extraTools) > 0 {
 		allTools = append(allTools, a.extraTools...)
@@ -768,6 +1105,18 @@ func (a *Agent) GetExtensionToolCount() int {
 	return len(a.extraTools)
 }

+// GetExtraTools returns the agent's current extra tools (e.g.
+// extension-registered tools). The returned slice is a copy so callers can
+// snapshot and later restore it via SetExtraTools.
+func (a *Agent) GetExtraTools() []fantasy.AgentTool {
+	if len(a.extraTools) == 0 {
+		return nil
+	}
+	out := make([]fantasy.AgentTool, len(a.extraTools))
+	copy(out, a.extraTools)
+	return out
+}
+
 // SetExtraTools replaces the agent's extra tools (e.g. extension-registered
 // tools) and rebuilds the internal agent with the updated tool list. The
 // model, system prompt, and all other configuration are preserved.
@@ -776,6 +1125,65 @@ func (a *Agent) SetExtraTools(extraTools []fantasy.AgentTool) {
 	a.rebuildFantasyAgent()
 }

+// AddMCPServer connects to a new MCP server at runtime and makes its tools
+// available to the agent. Returns the number of tools loaded.
+// If the agent has no tool manager (no MCP servers were configured at init),
+// one is created automatically.
+func (a *Agent) AddMCPServer(ctx context.Context, name string, cfg config.MCPServerConfig) (int, error) {
+	// Ensure MCP tools from initial load are settled first.
+	a.ensureMCPTools()
+
+	if a.toolManager == nil {
+		a.toolManager = tools.NewMCPToolManager()
+		if a.authHandler != nil {
+			a.toolManager.SetAuthHandler(a.authHandler)
+		}
+		if a.tokenStoreFactory != nil {
+			a.toolManager.SetTokenStoreFactory(a.tokenStoreFactory)
+		}
+		a.toolManager.SetTaskConfig(a.mcpTaskConfig)
+		a.toolManager.SetOnToolsChanged(func() {
+			a.rebuildFantasyAgent()
+		})
+	}
+
+	count, err := a.toolManager.AddServer(ctx, name, cfg)
+	if err != nil {
+		return 0, err
+	}
+
+	// AddServer's onToolsChanged callback triggers rebuildFantasyAgent,
+	// but only if it was wired. Ensure rebuild happens regardless.
+	a.rebuildFantasyAgent()
+	return count, nil
+}
+
+// RemoveMCPServer disconnects an MCP server and removes its tools from the agent.
+func (a *Agent) RemoveMCPServer(name string) error {
+	if a.toolManager == nil {
+		return fmt.Errorf("no MCP servers loaded")
+	}
+
+	// Ensure MCP tools from initial load are settled first.
+	a.ensureMCPTools()
+
+	err := a.toolManager.RemoveServer(name)
+	if err != nil {
+		return err
+	}
+
+	// RemoveServer's onToolsChanged callback triggers rebuildFantasyAgent,
+	// but ensure rebuild happens regardless.
+	a.rebuildFantasyAgent()
+	return nil
+}
+
+// GetMCPToolManager returns the underlying MCP tool manager.
+// Returns nil if no MCP servers have been configured.
+func (a *Agent) GetMCPToolManager() *tools.MCPToolManager {
+	return a.toolManager
+}
+
 // GetLoadingMessage returns the loading message from provider creation.
 func (a *Agent) GetLoadingMessage() string {
 	return a.loadingMessage
@@ -789,9 +1197,61 @@ func (a *Agent) GetLoadedServerNames() []string {
 	return a.toolManager.GetLoadedServerNames()
 }

-// SetModel swaps the agent's LLM provider to a new model. The existing tools,
-// system prompt, and configuration are preserved. The old provider is closed
-// if it has a closer. Returns the previous model string for notification.
+// GetMCPPrompts returns all prompts discovered from connected MCP servers.
+// Returns nil if no MCP servers are configured or no prompts were found.
+func (a *Agent) GetMCPPrompts() []tools.MCPPrompt {
+	if a.toolManager == nil {
+		return nil
+	}
+	return a.toolManager.GetPrompts()
+}
+
+// GetMCPPrompt retrieves and expands a specific prompt from an MCP server.
+// This is a lazy call — the server is contacted each time.
+func (a *Agent) GetMCPPrompt(ctx context.Context, serverName, promptName string, args map[string]string) (*tools.MCPPromptResult, error) {
+	if a.toolManager == nil {
+		return nil, fmt.Errorf("no MCP servers configured")
+	}
+	return a.toolManager.GetPrompt(ctx, serverName, promptName, args)
+}
+
+// GetMCPResources returns all resources discovered from connected MCP servers.
+func (a *Agent) GetMCPResources() []tools.MCPResource {
+	if a.toolManager == nil {
+		return nil
+	}
+	return a.toolManager.GetResources()
+}
+
+// ReadMCPResource reads a specific resource from an MCP server by URI.
+func (a *Agent) ReadMCPResource(ctx context.Context, serverName, uri string) (*tools.MCPResourceContent, error) {
+	if a.toolManager == nil {
+		return nil, fmt.Errorf("no MCP servers configured")
+	}
+	return a.toolManager.ReadResource(ctx, serverName, uri)
+}
+
+// SubscribeMCPResource subscribes to change notifications for a resource.
+func (a *Agent) SubscribeMCPResource(ctx context.Context, serverName, uri string) error {
+	if a.toolManager == nil {
+		return fmt.Errorf("no MCP servers configured")
+	}
+	return a.toolManager.SubscribeResource(ctx, serverName, uri)
+}
+
+// UnsubscribeMCPResource cancels change notifications for a resource.
+func (a *Agent) UnsubscribeMCPResource(ctx context.Context, serverName, uri string) error {
+	if a.toolManager == nil {
+		return fmt.Errorf("no MCP servers configured")
+	}
+	return a.toolManager.UnsubscribeResource(ctx, serverName, uri)
+}
+
+// SetModel swaps the agent's LLM provider to a new model. The existing tools
+// and configuration are preserved. When the new model's ProviderConfig carries
+// a system prompt (from per-model settings), it replaces the agent's stored
+// prompt so the rebuilt fantasy agent uses it. The old provider is closed if
+// it has a closer.
 func (a *Agent) SetModel(ctx context.Context, config *models.ProviderConfig) error {
 	// Ensure MCP tools are loaded before rebuilding (SetModel may be called
 	// before the first LLM call).
@@ -806,11 +1266,6 @@ func (a *Agent) SetModel(ctx context.Context, config *models.ProviderConfig) err
 		_ = a.providerCloser.Close()
 	}

-	// Update model info on MCP tool manager.
-	if a.toolManager != nil {
-		a.toolManager.SetModel(providerResult.Model)
-	}
-
 	// Swap fields.
 	a.model = providerResult.Model
 	a.providerCloser = providerResult.Closer
@@ -818,6 +1273,13 @@ func (a *Agent) SetModel(ctx context.Context, config *models.ProviderConfig) err
 	a.skipMaxOutputTokens = providerResult.SkipMaxOutputTokens
 	a.modelConfig = config

+	// Update system prompt when the config carries one (from per-model
+	// settings or the global config). This allows model-specific system
+	// prompts to take effect on model switch.
+	if config.SystemPrompt != "" {
+		a.systemPrompt = config.SystemPrompt
+	}
+
 	// Update provider type.
 	if config.ModelString != "" {
 		if p, _, err := models.ParseModelString(config.ModelString); err == nil {
@@ -836,6 +1298,40 @@ func (a *Agent) GetModel() fantasy.LanguageModel {
 	return a.model
 }

+// SetSystemPrompt updates the agent's system prompt and rebuilds the underlying
+// fantasy agent so subsequent turns use the new prompt. Safe to call while the
+// agent is idle; if invoked during an in-flight turn the new prompt takes
+// effect on the next LLM call.
+func (a *Agent) SetSystemPrompt(prompt string) {
+	a.promptMu.Lock()
+	defer a.promptMu.Unlock()
+	a.systemPrompt = prompt
+	a.rebuildFantasyAgent()
+}
+
+// GetSystemPrompt returns the agent's current system prompt.
+func (a *Agent) GetSystemPrompt() string {
+	a.promptMu.Lock()
+	defer a.promptMu.Unlock()
+	return a.systemPrompt
+}
+
+// GetMaxTokens returns the effective max output tokens the agent currently
+// sends to the LLM provider, after per-model defaults, right-sizing, and any
+// Anthropic thinking-budget adjustments. Returns 0 when no ModelConfig is
+// attached (e.g. early init) or when the provider suppresses the parameter
+// (e.g. Codex OAuth), which allows callers to differentiate "default" from
+// "explicitly capped".
+func (a *Agent) GetMaxTokens() int {
+	if a.skipMaxOutputTokens {
+		return 0
+	}
+	if a.modelConfig == nil {
+		return 0
+	}
+	return a.modelConfig.MaxTokens
+}
+
 // Close closes the agent and cleans up resources.
 // If MCP tools are still loading in the background, Close waits for them
 // to finish before closing connections to avoid resource leaks.
@@ -0,0 +1,302 @@
+package agent
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+	"time"
+
+	"charm.land/fantasy"
+
+	"github.com/mark3labs/kit/internal/config"
+)
+
+// mockModel is a minimal LanguageModel that satisfies the interface
+// without making real API calls. Used to test tool management wiring.
+type mockModel struct{}
+
+func (m *mockModel) Generate(_ context.Context, _ fantasy.Call) (*fantasy.Response, error) {
+	return &fantasy.Response{}, nil
+}
+func (m *mockModel) Stream(_ context.Context, _ fantasy.Call) (fantasy.StreamResponse, error) {
+	return nil, nil
+}
+func (m *mockModel) GenerateObject(_ context.Context, _ fantasy.ObjectCall) (*fantasy.ObjectResponse, error) {
+	return &fantasy.ObjectResponse{}, nil
+}
+func (m *mockModel) StreamObject(_ context.Context, _ fantasy.ObjectCall) (fantasy.ObjectStreamResponse, error) {
+	return nil, nil
+}
+func (m *mockModel) Provider() string { return "mock" }
+func (m *mockModel) Model() string    { return "mock-model" }
+
+// testdataDir returns the absolute path to the tools testdata directory.
+func testdataDir(t *testing.T) string {
+	t.Helper()
+	_, file, _, ok := runtime.Caller(0)
+	if !ok {
+		t.Fatal("cannot determine test file path")
+	}
+	return filepath.Join(filepath.Dir(file), "..", "tools", "testdata")
+}
+
+// echoServerConfig returns an MCPServerConfig for the test echo MCP server.
+func echoServerConfig(t *testing.T) config.MCPServerConfig {
+	t.Helper()
+	script := filepath.Join(testdataDir(t), "echo_server.py")
+	if _, err := os.Stat(script); err != nil {
+		t.Skipf("echo_server.py not found: %v", err)
+	}
+	return config.MCPServerConfig{
+		Command: []string{"python3", script},
+	}
+}
+
+// mockAuthHandler is a minimal MCPAuthHandler for testing that auth handler
+// propagation works without requiring a real OAuth server.
+type mockAuthHandler struct {
+	redirectURI string
+}
+
+func (h *mockAuthHandler) RedirectURI() string { return h.redirectURI }
+func (h *mockAuthHandler) HandleAuth(_ context.Context, _ string, _ string) (string, error) {
+	return "", nil
+}
+
+// newTestAgent creates a minimal Agent with a mock model and no core tools,
+// suitable for testing MCP server management without an API key.
+func newTestAgent() *Agent {
+	model := &mockModel{}
+	a := &Agent{
+		model:        model,
+		coreTools:    nil,
+		extraTools:   nil,
+		maxSteps:     10,
+		systemPrompt: "test",
+		fantasyAgent: fantasy.NewAgent(model),
+	}
+	return a
+}
+
+func TestAgent_AddMCPServer(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping integration test in short mode")
+	}
+
+	a := newTestAgent()
+	defer func() { _ = a.Close() }()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	cfg := echoServerConfig(t)
+
+	// Initially no MCP tools.
+	if a.GetMCPToolCount() != 0 {
+		t.Fatalf("Expected 0 MCP tools initially, got %d", a.GetMCPToolCount())
+	}
+
+	// Add a server.
+	count, err := a.AddMCPServer(ctx, "echo", cfg)
+	if err != nil {
+		t.Fatalf("AddMCPServer failed: %v", err)
+	}
+	if count != 2 {
+		t.Errorf("Expected 2 tools, got %d", count)
+	}
+
+	// Verify tools are in the agent's tool list.
+	if a.GetMCPToolCount() != 2 {
+		t.Errorf("Expected 2 MCP tools, got %d", a.GetMCPToolCount())
+	}
+
+	allTools := a.GetTools()
+	toolNames := make(map[string]bool)
+	for _, tool := range allTools {
+		toolNames[tool.Info().Name] = true
+	}
+	if !toolNames["echo__echo"] {
+		t.Error("Expected tool 'echo__echo' in agent tools")
+	}
+	if !toolNames["echo__greet"] {
+		t.Error("Expected tool 'echo__greet' in agent tools")
+	}
+
+	// Verify loaded server names.
+	names := a.GetLoadedServerNames()
+	found := false
+	for _, n := range names {
+		if n == "echo" {
+			found = true
+		}
+	}
+	if !found {
+		t.Errorf("Expected 'echo' in loaded server names: %v", names)
+	}
+}
+
+func TestAgent_RemoveMCPServer(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping integration test in short mode")
+	}
+
+	a := newTestAgent()
+	defer func() { _ = a.Close() }()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	cfg := echoServerConfig(t)
+
+	// Add then remove.
+	_, err := a.AddMCPServer(ctx, "echo", cfg)
+	if err != nil {
+		t.Fatalf("AddMCPServer failed: %v", err)
+	}
+
+	err = a.RemoveMCPServer("echo")
+	if err != nil {
+		t.Fatalf("RemoveMCPServer failed: %v", err)
+	}
+
+	// Verify tools removed.
+	if a.GetMCPToolCount() != 0 {
+		t.Errorf("Expected 0 MCP tools after removal, got %d", a.GetMCPToolCount())
+	}
+
+	// Verify agent's tool list has no MCP tools.
+	for _, tool := range a.GetTools() {
+		if strings.Contains(tool.Info().Name, "echo__") {
+			t.Errorf("Found leftover tool after removal: %s", tool.Info().Name)
+		}
+	}
+}
+
+func TestAgent_RemoveMCPServer_NoToolManager(t *testing.T) {
+	a := newTestAgent()
+	defer func() { _ = a.Close() }()
+
+	err := a.RemoveMCPServer("nonexistent")
+	if err == nil {
+		t.Fatal("Expected error when no tool manager exists")
+	}
+	if !strings.Contains(err.Error(), "no MCP servers loaded") {
+		t.Errorf("Expected 'no MCP servers loaded' error, got: %v", err)
+	}
+}
+
+func TestAgent_AddMCPServer_CreatesToolManager(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping integration test in short mode")
+	}
+
+	a := newTestAgent()
+	defer func() { _ = a.Close() }()
+
+	// Initially no tool manager.
+	if a.GetMCPToolManager() != nil {
+		t.Fatal("Expected nil tool manager initially")
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	cfg := echoServerConfig(t)
+	_, err := a.AddMCPServer(ctx, "echo", cfg)
+	if err != nil {
+		t.Fatalf("AddMCPServer failed: %v", err)
+	}
+
+	// Tool manager should now exist.
+	if a.GetMCPToolManager() == nil {
+		t.Fatal("Expected tool manager to be created by AddMCPServer")
+	}
+}
+
+func TestAgent_AddRemoveAdd_MCP(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping integration test in short mode")
+	}
+
+	a := newTestAgent()
+	defer func() { _ = a.Close() }()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	cfg := echoServerConfig(t)
+
+	// Add → Remove → Add cycle.
+	_, err := a.AddMCPServer(ctx, "echo", cfg)
+	if err != nil {
+		t.Fatalf("First add failed: %v", err)
+	}
+
+	err = a.RemoveMCPServer("echo")
+	if err != nil {
+		t.Fatalf("Remove failed: %v", err)
+	}
+
+	count, err := a.AddMCPServer(ctx, "echo", cfg)
+	if err != nil {
+		t.Fatalf("Re-add failed: %v", err)
+	}
+	if count != 2 {
+		t.Errorf("Expected 2 tools on re-add, got %d", count)
+	}
+	if a.GetMCPToolCount() != 2 {
+		t.Errorf("Expected 2 MCP tools after re-add, got %d", a.GetMCPToolCount())
+	}
+}
+
+// TestAgent_AddMCPServer_InheritsAuthHandler verifies that AddMCPServer()
+// propagates the agent's authHandler and tokenStoreFactory to a newly created
+// MCPToolManager (fix for issue #3).
+func TestAgent_AddMCPServer_InheritsAuthHandler(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping integration test in short mode")
+	}
+
+	handler := &mockAuthHandler{redirectURI: "http://localhost:9999/oauth/callback"}
+
+	model := &mockModel{}
+	a := &Agent{
+		model:             model,
+		coreTools:         nil,
+		extraTools:        nil,
+		maxSteps:          10,
+		systemPrompt:      "test",
+		fantasyAgent:      fantasy.NewAgent(model),
+		authHandler:       handler,
+		tokenStoreFactory: nil, // nil is fine; we just test authHandler propagation
+	}
+	defer func() { _ = a.Close() }()
+
+	// Initially no tool manager.
+	if a.GetMCPToolManager() != nil {
+		t.Fatal("Expected nil tool manager initially")
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	cfg := echoServerConfig(t)
+	_, err := a.AddMCPServer(ctx, "echo", cfg)
+	if err != nil {
+		t.Fatalf("AddMCPServer failed: %v", err)
+	}
+
+	// Tool manager should now exist and have the auth handler set.
+	tm := a.GetMCPToolManager()
+	if tm == nil {
+		t.Fatal("Expected tool manager to be created by AddMCPServer")
+	}
+
+	// Verify the auth handler was propagated by checking the field directly.
+	if tm.GetAuthHandler() == nil {
+		t.Fatal("Expected auth handler to be propagated to tool manager")
+	}
+}
@@ -0,0 +1,109 @@
+package agent
+
+import (
+	"context"
+	"sync"
+	"testing"
+
+	"charm.land/fantasy"
+)
+
+// fakeParallelAgent simulates a provider that emits two parallel tool_use
+// blocks in a single step. It invokes the streaming callbacks in the order:
+//
+//	OnToolCall(A) -> OnToolCall(B) -> OnToolResult(A) -> OnToolResult(B)
+//
+// Before the fix in #33 the agent-layer wrapper recorded a single
+// `currentToolArgs` variable that was clobbered by the second OnToolCall, so
+// both OnToolResult callbacks received B's args instead of their own.
+type fakeParallelAgent struct {
+	calls   []fantasy.ToolCallContent
+	results []fantasy.ToolResultContent
+}
+
+func (f *fakeParallelAgent) Generate(_ context.Context, _ fantasy.AgentCall) (*fantasy.AgentResult, error) {
+	return &fantasy.AgentResult{}, nil
+}
+
+func (f *fakeParallelAgent) Stream(_ context.Context, opts fantasy.AgentStreamCall) (*fantasy.AgentResult, error) {
+	for _, tc := range f.calls {
+		if opts.OnToolCall != nil {
+			if err := opts.OnToolCall(tc); err != nil {
+				return nil, err
+			}
+		}
+	}
+	for _, tr := range f.results {
+		if opts.OnToolResult != nil {
+			if err := opts.OnToolResult(tr); err != nil {
+				return nil, err
+			}
+		}
+	}
+	return &fantasy.AgentResult{}, nil
+}
+
+// TestGenerateWithCallbacks_ParallelToolArgs is the regression test for #33.
+// It drives the streaming-callback wiring inside GenerateWithCallbacks with a
+// fake fantasy.Agent that emits two parallel tool calls before either result.
+// Each OnToolResult must receive the args of its own tool call (matched by
+// ToolCallID), not the args of the last OnToolCall in the step.
+func TestGenerateWithCallbacks_ParallelToolArgs(t *testing.T) {
+	t.Parallel()
+
+	argsA := `{"name":"scheduled_jobs"}`
+	argsB := `{"name":"gmail_trigger"}`
+
+	fake := &fakeParallelAgent{
+		calls: []fantasy.ToolCallContent{
+			{ToolCallID: "kit-A", ToolName: "load_skill", Input: argsA},
+			{ToolCallID: "kit-B", ToolName: "load_skill", Input: argsB},
+		},
+		results: []fantasy.ToolResultContent{
+			{ToolCallID: "kit-A", ToolName: "load_skill", Result: fantasy.ToolResultOutputContentText{Text: "ok-A"}},
+			{ToolCallID: "kit-B", ToolName: "load_skill", Result: fantasy.ToolResultOutputContentText{Text: "ok-B"}},
+		},
+	}
+
+	a := &Agent{
+		fantasyAgent:     fake,
+		streamingEnabled: false, // exercise the "hasCallbacks" branch
+	}
+
+	var mu sync.Mutex
+	resultArgs := map[string]string{}
+	executionArgs := map[string]string{} // captured when running == false
+
+	cb := GenerateCallbacks{
+		OnToolExecution: func(id, _, args string, running bool) {
+			if running {
+				return
+			}
+			mu.Lock()
+			defer mu.Unlock()
+			executionArgs[id] = args
+		},
+		OnToolResult: func(id, _, args, _, _ string, _ bool) {
+			mu.Lock()
+			defer mu.Unlock()
+			resultArgs[id] = args
+		},
+	}
+
+	if _, err := a.GenerateWithCallbacks(context.Background(), nil, cb); err != nil {
+		t.Fatalf("GenerateWithCallbacks returned error: %v", err)
+	}
+
+	if got, want := resultArgs["kit-A"], argsA; got != want {
+		t.Errorf("OnToolResult for kit-A: args = %q, want %q", got, want)
+	}
+	if got, want := resultArgs["kit-B"], argsB; got != want {
+		t.Errorf("OnToolResult for kit-B: args = %q, want %q", got, want)
+	}
+	if got, want := executionArgs["kit-A"], argsA; got != want {
+		t.Errorf("OnToolExecution(finish) for kit-A: args = %q, want %q", got, want)
+	}
+	if got, want := executionArgs["kit-B"], argsB; got != want {
+		t.Errorf("OnToolExecution(finish) for kit-B: args = %q, want %q", got, want)
+	}
+}
@@ -38,13 +38,26 @@ type AgentCreationOptions struct {
 	DebugLogger tools.DebugLogger // Optional debug logger
 	// AuthHandler handles OAuth authorization for remote MCP servers
 	AuthHandler tools.MCPAuthHandler
+	// TokenStoreFactory, if non-nil, creates a custom token store for each
+	// remote MCP server's OAuth tokens. When nil, the default file-based
+	// token store is used.
+	TokenStoreFactory tools.TokenStoreFactory
 	// CoreTools overrides the default core tool set. If empty, core.AllTools()
 	// is used.
 	CoreTools []fantasy.AgentTool
+	// DisableCoreTools, when true, prevents loading any core tools.
+	// If both DisableCoreTools is true and CoreTools is empty, the agent
+	// will have no tools (useful for simple chat completions).
+	DisableCoreTools bool
 	// ToolWrapper wraps the combined tool list before agent creation.
 	ToolWrapper func([]fantasy.AgentTool) []fantasy.AgentTool
 	// ExtraTools are additional tools to include (e.g. from extensions).
 	ExtraTools []fantasy.AgentTool
+	// OnMCPServerLoaded, if non-nil, is called when each MCP server finishes
+	// loading (successfully or with error). Called from the background goroutine.
+	OnMCPServerLoaded func(serverName string, toolCount int, err error)
+	// MCPTaskConfig configures task-augmented tools/call execution.
+	MCPTaskConfig tools.MCPTaskConfig
 }

 // CreateAgent creates an agent with optional spinner for Ollama models.
@@ -52,16 +65,20 @@ type AgentCreationOptions struct {
 // Returns the created agent or an error if creation fails.
 func CreateAgent(ctx context.Context, opts *AgentCreationOptions) (*Agent, error) {
 	agentConfig := &AgentConfig{
-		ModelConfig:      opts.ModelConfig,
-		MCPConfig:        opts.MCPConfig,
-		SystemPrompt:     opts.SystemPrompt,
-		MaxSteps:         opts.MaxSteps,
-		StreamingEnabled: opts.StreamingEnabled,
-		DebugLogger:      opts.DebugLogger,
-		AuthHandler:      opts.AuthHandler,
-		CoreTools:        opts.CoreTools,
-		ToolWrapper:      opts.ToolWrapper,
-		ExtraTools:       opts.ExtraTools,
+		ModelConfig:       opts.ModelConfig,
+		MCPConfig:         opts.MCPConfig,
+		SystemPrompt:      opts.SystemPrompt,
+		MaxSteps:          opts.MaxSteps,
+		StreamingEnabled:  opts.StreamingEnabled,
+		DebugLogger:       opts.DebugLogger,
+		AuthHandler:       opts.AuthHandler,
+		TokenStoreFactory: opts.TokenStoreFactory,
+		CoreTools:         opts.CoreTools,
+		DisableCoreTools:  opts.DisableCoreTools,
+		ToolWrapper:       opts.ToolWrapper,
+		ExtraTools:        opts.ExtraTools,
+		OnMCPServerLoaded: opts.OnMCPServerLoaded,
+		MCPTaskConfig:     opts.MCPTaskConfig,
 	}

 	var agent *Agent
@@ -0,0 +1,88 @@
+package agent
+
+import (
+	"context"
+	"fmt"
+
+	"charm.land/fantasy"
+
+	"github.com/mark3labs/kit/internal/tools"
+)
+
+// mcpExecutor is the subset of *tools.MCPToolManager that the adapter
+// actually uses. Extracted as an interface so the adapter is unit-testable
+// without constructing a full manager + connection pool.
+type mcpExecutor interface {
+	ExecuteTool(ctx context.Context, prefixedName, inputJSON string) (*tools.MCPToolResult, error)
+}
+
+// mcpAgentTool adapts an tools.MCPTool to the fantasy.AgentTool interface.
+// This keeps the fantasy dependency confined to the agent layer — the tools
+// package is a pure MCP client library with no LLM framework dependency.
+type mcpAgentTool struct {
+	tool            tools.MCPTool
+	exec            mcpExecutor
+	providerOptions fantasy.ProviderOptions
+}
+
+// Info returns the fantasy tool info including name, description, and parameter schema.
+func (t *mcpAgentTool) Info() fantasy.ToolInfo {
+	return fantasy.ToolInfo{
+		Name:        t.tool.Name,
+		Description: t.tool.Description,
+		Parameters:  t.tool.Parameters,
+		Required:    t.tool.Required,
+	}
+}
+
+// Run executes the MCP tool by delegating to the MCPToolManager.
+//
+// MCP-side failures (JSON-RPC protocol errors, transport failures, schema
+// validation rejections from the server) are surfaced to the model as soft
+// tool errors rather than escalated to a critical agent error. This matches
+// the contract that native Kit tools follow via kit.ErrorResult(...) and
+// lets the model self-correct (e.g. retry with a fixed argument shape) or
+// give up gracefully rather than aborting the turn mid-run.
+//
+// Context cancellation is the one exception: if the caller cancelled the
+// context the turn was aborted intentionally, so we propagate the ctx error
+// to let the agent loop unwind cleanly.
+func (t *mcpAgentTool) Run(ctx context.Context, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
+	result, err := t.exec.ExecuteTool(ctx, t.tool.Name, call.Input)
+	if err != nil {
+		if ctxErr := ctx.Err(); ctxErr != nil {
+			return fantasy.ToolResponse{}, ctxErr
+		}
+		return fantasy.NewTextErrorResponse(
+			fmt.Sprintf("MCP tool %q failed: %s", t.tool.Name, err.Error()),
+		), nil
+	}
+
+	if result.IsError {
+		return fantasy.NewTextErrorResponse(result.Content), nil
+	}
+	return fantasy.NewTextResponse(result.Content), nil
+}
+
+// ProviderOptions returns provider-specific options for this tool.
+func (t *mcpAgentTool) ProviderOptions() fantasy.ProviderOptions {
+	return t.providerOptions
+}
+
+// SetProviderOptions sets provider-specific options for this tool.
+func (t *mcpAgentTool) SetProviderOptions(opts fantasy.ProviderOptions) {
+	t.providerOptions = opts
+}
+
+// mcpToolsToAgentTools converts a slice of MCPTool to fantasy.AgentTool
+// implementations that route execution through the MCPToolManager.
+func mcpToolsToAgentTools(mcpTools []tools.MCPTool, manager *tools.MCPToolManager) []fantasy.AgentTool {
+	agentTools := make([]fantasy.AgentTool, len(mcpTools))
+	for i, t := range mcpTools {
+		agentTools[i] = &mcpAgentTool{
+			tool: t,
+			exec: manager,
+		}
+	}
+	return agentTools
+}
@@ -0,0 +1,158 @@
+package agent
+
+import (
+	"context"
+	"errors"
+	"strings"
+	"testing"
+	"time"
+
+	"charm.land/fantasy"
+
+	"github.com/mark3labs/kit/internal/tools"
+)
+
+// stubExecutor lets each test script the (result, err) pair returned by
+// ExecuteTool. The adapter holds an mcpExecutor interface, so this is the
+// only seam the tests need.
+type stubExecutor struct {
+	result *tools.MCPToolResult
+	err    error
+	// called records the last invocation for assertion.
+	called bool
+	name   string
+	input  string
+}
+
+func (s *stubExecutor) ExecuteTool(_ context.Context, prefixedName, inputJSON string) (*tools.MCPToolResult, error) {
+	s.called = true
+	s.name = prefixedName
+	s.input = inputJSON
+	return s.result, s.err
+}
+
+func newMCPAgentTool(exec mcpExecutor, name string) *mcpAgentTool {
+	return &mcpAgentTool{
+		tool: tools.MCPTool{Name: name},
+		exec: exec,
+	}
+}
+
+// Manager-side Go errors (JSON-RPC protocol errors, transport failures,
+// schema validation rejections from the MCP server) must be surfaced to
+// the model as soft tool errors so the agent loop can keep going. Aborting
+// the turn would discard all prior tool results — see issue #N.
+func TestMCPAgentTool_RPCErrorBecomesSoftError(t *testing.T) {
+	exec := &stubExecutor{
+		err: errors.New("MCP error -32602: Invalid params: missing field \"task\""),
+	}
+	tool := newMCPAgentTool(exec, "pubmed__search")
+
+	resp, err := tool.Run(context.Background(), fantasy.ToolCall{
+		ID:    "call-1",
+		Name:  "pubmed__search",
+		Input: `{"query":"foo"}`,
+	})
+
+	if err != nil {
+		t.Fatalf("expected nil error (soft), got %v", err)
+	}
+	if !resp.IsError {
+		t.Fatalf("expected IsError=true, got false")
+	}
+	if !strings.Contains(resp.Content, "pubmed__search") {
+		t.Errorf("expected tool name in error content, got %q", resp.Content)
+	}
+	if !strings.Contains(resp.Content, "-32602") {
+		t.Errorf("expected underlying error text in content, got %q", resp.Content)
+	}
+}
+
+// Context cancellation is the one error that must remain critical: it
+// means the caller intentionally aborted, and the agent loop needs to
+// unwind cleanly rather than burning more steps.
+func TestMCPAgentTool_CtxCancelStaysCritical(t *testing.T) {
+	exec := &stubExecutor{
+		// Real managers typically return ctx.Err() (or a wrapper) when the
+		// context is cancelled mid-call.
+		err: context.Canceled,
+	}
+	tool := newMCPAgentTool(exec, "slow__tool")
+
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+
+	resp, err := tool.Run(ctx, fantasy.ToolCall{Name: "slow__tool"})
+
+	if !errors.Is(err, context.Canceled) {
+		t.Fatalf("expected context.Canceled, got %v", err)
+	}
+	if resp.IsError || resp.Content != "" {
+		t.Errorf("expected empty response on critical error, got IsError=%v Content=%q", resp.IsError, resp.Content)
+	}
+}
+
+// Deadline-exceeded behaves the same as cancellation: ctx.Err() is
+// non-nil, so the adapter must propagate the critical error rather than
+// converting the executor's error into a soft response.
+func TestMCPAgentTool_CtxDeadlineStaysCritical(t *testing.T) {
+	exec := &stubExecutor{err: context.DeadlineExceeded}
+	tool := newMCPAgentTool(exec, "slow__tool")
+
+	ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(-time.Second))
+	defer cancel()
+
+	resp, err := tool.Run(ctx, fantasy.ToolCall{Name: "slow__tool"})
+	if !errors.Is(err, context.DeadlineExceeded) {
+		t.Fatalf("expected context.DeadlineExceeded, got %v", err)
+	}
+	if resp.IsError || resp.Content != "" {
+		t.Errorf("expected empty response on critical error, got IsError=%v Content=%q", resp.IsError, resp.Content)
+	}
+}
+
+// Server-side soft errors (CallToolResult{ isError: true }) must continue
+// to flow through as soft errors — this was the existing behavior and
+// must not regress.
+func TestMCPAgentTool_ServerIsErrorRemainsSoftError(t *testing.T) {
+	exec := &stubExecutor{
+		result: &tools.MCPToolResult{
+			IsError: true,
+			Content: "search service is rate limited; try again in 30s",
+		},
+	}
+	tool := newMCPAgentTool(exec, "pubmed__search")
+
+	resp, err := tool.Run(context.Background(), fantasy.ToolCall{Name: "pubmed__search"})
+	if err != nil {
+		t.Fatalf("expected nil error, got %v", err)
+	}
+	if !resp.IsError {
+		t.Fatalf("expected IsError=true, got false")
+	}
+	if resp.Content != "search service is rate limited; try again in 30s" {
+		t.Errorf("expected pass-through content, got %q", resp.Content)
+	}
+}
+
+// Happy path: ordinary successful tool result is passed through unchanged.
+func TestMCPAgentTool_SuccessIsPassthrough(t *testing.T) {
+	exec := &stubExecutor{
+		result: &tools.MCPToolResult{
+			IsError: false,
+			Content: `{"hits":3}`,
+		},
+	}
+	tool := newMCPAgentTool(exec, "pubmed__search")
+
+	resp, err := tool.Run(context.Background(), fantasy.ToolCall{Name: "pubmed__search"})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if resp.IsError {
+		t.Fatalf("expected IsError=false")
+	}
+	if resp.Content != `{"hits":3}` {
+		t.Errorf("expected pass-through content, got %q", resp.Content)
+	}
+}
@@ -2,6 +2,7 @@ package app

 import (
 	"context"
+	"errors"
 	"fmt"
 	"log"
 	"os"
@@ -13,6 +14,7 @@ import (
 	"charm.land/fantasy"

 	"github.com/mark3labs/kit/internal/extensions"
+	"github.com/mark3labs/kit/internal/message"
 	"github.com/mark3labs/kit/internal/session"
 	kit "github.com/mark3labs/kit/pkg/kit"
 )
@@ -23,6 +25,26 @@ type queueItem struct {
 	Files  []kit.LLMFilePart
 }

+// ErrAgentBusy is returned when an operation cannot proceed because the agent
+// is still processing a turn (including any post-turn extension hooks) and did
+// not become idle before the operation's deadline.
+//
+// This is an alias for extensions.ErrAgentBusy so the extension API and the
+// app layer share a single sentinel value — callers can detect the condition
+// with errors.Is(err, app.ErrAgentBusy) without substring-matching the error
+// message.
+var ErrAgentBusy = extensions.ErrAgentBusy
+
+// DefaultNewSessionIdleWait bounds how long RequestNewSessionFromExtension
+// will block waiting for the agent to settle. It needs to be generous enough
+// to cover real-world post-turn tooling (project formatters, on-save linters,
+// hidden tool calls) which routinely hold the busy flag for seconds and
+// occasionally minutes — yet still short enough to surface a wedged agent.
+//
+// Issue #63 reported workloads where the busy window regularly exceeded
+// 6 seconds; ten minutes is the same bound the workaround in that issue used.
+const DefaultNewSessionIdleWait = 10 * time.Minute
+
 // App is the application-layer orchestrator. It owns the agentic loop,
 // conversation history (via MessageStore), and queue management. It is
 // designed to be created once per session and reused across multiple prompts.
@@ -54,11 +76,25 @@ type App struct {
 	// each new step and called by CancelCurrentStep().
 	cancelStep context.CancelFunc

-	// mu protects busy, queue, and cancelStep.
+	// mu protects busy, queue, cancelStep, and idleCh.
 	mu    sync.Mutex
 	busy  bool
 	queue []queueItem

+	// idleCh is closed when the agent transitions from busy back to idle.
+	// While the agent is idle the channel is already closed (recv returns
+	// immediately). When busy transitions to true a fresh open channel is
+	// allocated so callers blocked on the previous one are released. All
+	// transitions are funnelled through setBusyLocked to keep the channel
+	// pointer in sync with the busy flag.
+	//
+	// This is the underlying primitive WaitForIdle and
+	// RequestNewSessionFromExtension wait on to fix the AgentEnd→NewSession
+	// race described in issue #63: AgentEnd is emitted from inside the agent
+	// loop, before drainQueue clears busy, so any extension hook that calls
+	// ctx.NewSession synchronously would otherwise observe busy==true.
+	idleCh chan struct{}
+
 	// wg tracks in-flight goroutines; Close() waits on it.
 	wg sync.WaitGroup

@@ -70,20 +106,34 @@ type App struct {
 	rootCtx    context.Context
 	rootCancel context.CancelFunc

-	// widgetUpdatePending is set to true when a WidgetUpdateEvent has been
-	// sent to the TUI but not yet consumed by its event loop. While the flag
-	// is set, subsequent NotifyWidgetUpdate calls are coalesced (dropped) to
-	// prevent fast extension tickers from flooding the BubbleTea mailbox with
-	// redundant re-render triggers. The flag is cleared after a short debounce
-	// (~1 frame) so new updates are always let through once the TUI has had a
-	// chance to process the pending event.
-	widgetUpdatePending atomic.Bool
+	// widgetUpdatePending is set to true while a WidgetUpdateEvent burst is
+	// being coalesced. The leading edge fires immediately; subsequent calls
+	// within the debounce window set widgetUpdateTrailing so a final event
+	// is delivered with the latest runner state at the end of the window.
+	// Without the trailing send, a rapid SetWidget→RemoveWidget pair (e.g.
+	// SubagentEnd pushing a final frame then removing the widget) would let
+	// the second call get silently dropped, leaving the TUI's layout stuck
+	// on the pre-removal widget height — visible as empty rows below the
+	// status bar after the widget disappears.
+	widgetUpdatePending  atomic.Bool
+	widgetUpdateTrailing atomic.Bool
+
+	// steerDrainFn is the test seam used by releaseBusyAfterCompact to pull
+	// any steer messages that arrived during compaction. In production it is
+	// nil and the helper falls back to a.opts.Kit.DrainSteer(); tests that
+	// need to exercise the steer-drain path without standing up a full
+	// *kit.Kit can set this field directly to inject fake items.
+	steerDrainFn func() []queueItem
 }

 // New creates a new App with the provided options and pre-loaded messages.
 // initialMessages may be nil or empty for a fresh session.
 func New(opts Options, initialMessages []kit.LLMMessage) *App {
 	rootCtx, rootCancel := context.WithCancel(context.Background())
+	// idleCh starts already closed: the freshly constructed App is idle, so
+	// any caller blocking on it via WaitForIdle should be released immediately.
+	idleCh := make(chan struct{})
+	close(idleCh)
 	return &App{
 		opts:       opts,
 		store:      NewMessageStoreWithMessages(initialMessages),
@@ -91,6 +141,90 @@ func New(opts Options, initialMessages []kit.LLMMessage) *App {
 		rootCancel: rootCancel,
 		// cancelStep starts as a no-op so CancelCurrentStep() is always safe.
 		cancelStep: func() {},
+		idleCh:     idleCh,
+	}
+}
+
+// setBusyLocked is the single chokepoint for mutating a.busy. It keeps the
+// idleCh signalling channel in sync with the busy flag:
+//
+//   - false → true: allocate a fresh open channel so future WaitForIdle
+//     callers block until the next idle transition.
+//   - true  → false: close the current channel so any waiters wake up.
+//
+// No-op when the requested state already matches. The caller must hold a.mu.
+func (a *App) setBusyLocked(busy bool) {
+	if a.busy == busy {
+		return
+	}
+	a.busy = busy
+	if busy {
+		a.idleCh = make(chan struct{})
+	} else {
+		close(a.idleCh)
+	}
+}
+
+// idleSnapshot returns the current busy state and the channel that will be
+// closed on the next idle transition. The snapshot is taken under a.mu so the
+// pair is consistent (busy==true ⇒ ch is the open channel for *this* busy
+// cycle, not a stale one).
+func (a *App) idleSnapshot() (busy bool, ch chan struct{}) {
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	return a.busy, a.idleCh
+}
+
+// WaitForIdle blocks until the agent is idle, the given timeout elapses, or
+// the app shuts down. Returns nil on idle, ErrAgentBusy on timeout, or the
+// rootCtx error if the app is closing.
+//
+// A non-positive timeout disables the deadline and waits indefinitely (until
+// idle or app shutdown). Safe to call from any goroutine, but never from
+// inside the Bubble Tea Update() loop — it blocks.
+//
+// Idiomatic use from extensions:
+//
+//	if err := app.WaitForIdle(0); err != nil { /* shutdown */ }
+//
+// The loop guards against the agent re-arming itself between wakeups: if
+// another prompt is queued (or a steer message lands) while we're waiting,
+// setBusyLocked allocates a fresh idleCh and we wait again.
+func (a *App) WaitForIdle(timeout time.Duration) error {
+	var deadline time.Time
+	if timeout > 0 {
+		deadline = time.Now().Add(timeout)
+	}
+	for {
+		busy, ch := a.idleSnapshot()
+		if !busy {
+			return nil
+		}
+		var timer *time.Timer
+		var timerCh <-chan time.Time
+		if timeout > 0 {
+			remaining := time.Until(deadline)
+			if remaining <= 0 {
+				return ErrAgentBusy
+			}
+			timer = time.NewTimer(remaining)
+			timerCh = timer.C
+		}
+		select {
+		case <-ch:
+			// Idle transition observed — loop and re-check under the
+			// mutex in case a new busy cycle started immediately after.
+		case <-timerCh:
+			return ErrAgentBusy
+		case <-a.rootCtx.Done():
+			if timer != nil {
+				timer.Stop()
+			}
+			return a.rootCtx.Err()
+		}
+		if timer != nil {
+			timer.Stop()
+		}
 	}
 }

@@ -144,7 +278,7 @@ func (a *App) RunWithFiles(prompt string, files []kit.LLMFilePart) int {
 		return qLen
 	}

-	a.busy = true
+	a.setBusyLocked(true)
 	a.wg.Add(1)
 	a.mu.Unlock()
 	go a.drainQueue(item)
@@ -224,7 +358,7 @@ func (a *App) SteerWithFiles(prompt string, files []kit.LLMFilePart) int {
 	if !a.busy {
 		// Not busy — start immediately, same as RunWithFiles().
 		item := queueItem{Prompt: prompt, Files: files}
-		a.busy = true
+		a.setBusyLocked(true)
 		a.wg.Add(1)
 		a.mu.Unlock()
 		go a.drainQueue(item)
@@ -260,7 +394,7 @@ func (a *App) InterruptAndSend(prompt string) {

 	if !a.busy {
 		// Not busy — start immediately, same as Run().
-		a.busy = true
+		a.setBusyLocked(true)
 		a.wg.Add(1)
 		a.mu.Unlock()
 		go a.drainQueue(item)
@@ -333,6 +467,90 @@ func (a *App) SwitchTreeSession(ts *session.TreeManager) {
 	}
 }

+// PopLastUserMessage truncates the tree session back to the parent of the
+// most recent user message on the current branch, syncs the in-memory
+// message store, and returns the user prompt text plus any image file
+// parts so the caller can resubmit via Run/RunWithFiles.
+//
+// This is the building block for /retry: the user message and any orphaned
+// assistant/tool entries produced by a failed turn become unreachable on
+// the current branch (they remain in the session file under a different
+// leaf) and are excluded from the next LLM context.
+//
+// Returns an error when:
+//   - the agent is currently working (busy)
+//   - the app has been closed
+//   - no tree session is active (sessions disabled via --no-session)
+//   - no user message exists on the current branch
+//
+// Satisfies ui.AppController.
+func (a *App) PopLastUserMessage() (string, []kit.LLMFilePart, error) {
+	a.mu.Lock()
+	if a.closed {
+		a.mu.Unlock()
+		return "", nil, fmt.Errorf("app is closed")
+	}
+	if a.busy {
+		a.mu.Unlock()
+		return "", nil, fmt.Errorf("cannot retry while the agent is working")
+	}
+	a.mu.Unlock()
+
+	ts := a.opts.TreeSession
+	if ts == nil {
+		return "", nil, fmt.Errorf("no tree session active; /retry requires a session")
+	}
+
+	// Walk the current branch backwards to find the most recent user message.
+	branch := ts.GetBranch("")
+	var target *session.MessageEntry
+	for i := len(branch) - 1; i >= 0; i-- {
+		me, ok := branch[i].(*session.MessageEntry)
+		if !ok {
+			continue
+		}
+		if me.Role == string(message.RoleUser) {
+			target = me
+			break
+		}
+	}
+	if target == nil {
+		return "", nil, fmt.Errorf("no user message to retry")
+	}
+
+	// Extract the prompt text and any image parts from the target entry.
+	msg, err := target.ToMessage()
+	if err != nil {
+		return "", nil, fmt.Errorf("decode user message: %w", err)
+	}
+	prompt := msg.Content()
+	var files []kit.LLMFilePart
+	for _, part := range msg.Parts {
+		if ic, ok := part.(message.ImageContent); ok {
+			files = append(files, kit.LLMFilePart{
+				Data:      ic.Data,
+				MediaType: ic.MediaType,
+			})
+		}
+	}
+
+	// Move the leaf to the parent of the user message. The failed turn's
+	// entries (user message + any partial assistant/tool entries) are still
+	// in the tree file but no longer on the active branch, so they will not
+	// be re-sent to the LLM. runTurn() will append a fresh user message on
+	// the next call.
+	if err := ts.Branch(target.ParentID); err != nil {
+		return "", nil, fmt.Errorf("branch to parent: %w", err)
+	}
+
+	// Sync the in-memory store with the new branch position so subsequent
+	// reads (and ReloadMessagesFromTree() consumers) see the truncated view.
+	a.store.Clear()
+	a.store.Replace(ts.GetLLMMessages())
+
+	return prompt, files, nil
+}
+
 // AddContextMessage adds a user-role message to the conversation history
 // without triggering an LLM response. Used by the ! shell command prefix
 // to inject command output into context so the LLM can reference it in
@@ -356,6 +574,10 @@ func (a *App) AddContextMessage(text string) {
 // tea.Program. customInstructions is optional text appended to the summary
 // prompt (e.g. "Focus on the API design decisions").
 //
+// Any prompts queued via Run/RunWithFiles or steering messages injected via
+// Steer/SteerWithFiles while compaction is running are flushed automatically
+// once compaction completes (see releaseBusyAfterCompact).
+//
 // Satisfies ui.AppController.
 func (a *App) CompactConversation(customInstructions string) error {
 	a.mu.Lock()
@@ -371,17 +593,13 @@ func (a *App) CompactConversation(customInstructions string) error {
 		a.mu.Unlock()
 		return fmt.Errorf("SDK instance not available")
 	}
-	a.busy = true
+	a.setBusyLocked(true)
 	a.wg.Add(1)
 	a.mu.Unlock()

 	go func() {
 		defer a.wg.Done()
-		defer func() {
-			a.mu.Lock()
-			a.busy = false
-			a.mu.Unlock()
-		}()
+		defer a.releaseBusyAfterCompact()

 		// Subscribe to SDK events for streaming compaction summary to the TUI.
 		sendFn := func(msg tea.Msg) {
@@ -420,6 +638,9 @@ func (a *App) CompactConversation(customInstructions string) error {
 // CompactAsync is like CompactConversation but calls onComplete/onError
 // callbacks instead of sending TUI events. Used by the extension API's
 // ctx.Compact() which needs callback-based notification.
+//
+// Like CompactConversation, any prompts/steer messages received during
+// compaction are flushed automatically once compaction finishes.
 func (a *App) CompactAsync(customInstructions string, onComplete func(), onError func(string)) error {
 	a.mu.Lock()
 	if a.closed {
@@ -434,17 +655,13 @@ func (a *App) CompactAsync(customInstructions string, onComplete func(), onError
 		a.mu.Unlock()
 		return fmt.Errorf("SDK instance not available")
 	}
-	a.busy = true
+	a.setBusyLocked(true)
 	a.wg.Add(1)
 	a.mu.Unlock()

 	go func() {
 		defer a.wg.Done()
-		defer func() {
-			a.mu.Lock()
-			a.busy = false
-			a.mu.Unlock()
-		}()
+		defer a.releaseBusyAfterCompact()

 		// Subscribe to SDK events for streaming compaction summary to the TUI.
 		sendFn := func(msg tea.Msg) {
@@ -489,6 +706,81 @@ func (a *App) CompactAsync(customInstructions string, onComplete func(), onError
 	return nil
 }

+// releaseBusyAfterCompact is the deferred tail that runs at the end of every
+// compaction goroutine (success, error, or panic-after-recover paths). It
+// flips a.busy back to false, but before doing so it checks whether any
+// prompts piled up while compaction was running:
+//
+//   - Run/RunWithFiles append to a.queue when a.busy is set.
+//   - Steer/SteerWithFiles deposit messages into the SDK steer channel via
+//     Kit.InjectSteerWithFiles when a.busy is set.
+//
+// Without this hand-off the queue would sit idle until the user submits
+// another prompt — see issue #27. If we find anything pending we keep busy
+// set, splice the steer messages to the front of the queue, and start a
+// fresh drainQueue goroutine to deliver them as a single batched turn.
+func (a *App) releaseBusyAfterCompact() {
+	// Pull steer messages outside the app mutex; DrainSteer takes its own
+	// internal lock and we don't want to nest the two. The test seam
+	// (a.steerDrainFn) takes precedence so unit tests can inject fake
+	// steer items without a real *kit.Kit.
+	var steerItems []queueItem
+	switch {
+	case a.steerDrainFn != nil:
+		steerItems = a.steerDrainFn()
+	case a.opts.Kit != nil:
+		if leftover := a.opts.Kit.DrainSteer(); len(leftover) > 0 {
+			steerItems = make([]queueItem, len(leftover))
+			for i, sm := range leftover {
+				steerItems[i] = queueItem{Prompt: sm.Text, Files: sm.Files}
+			}
+		}
+	}
+
+	a.mu.Lock()
+	// If the app was closed while compaction was running, drop everything
+	// and just clear busy. Run/Steer would have rejected new items already
+	// after Close(), but this guards against in-flight items that slipped
+	// in just before closed was set.
+	if a.closed {
+		a.queue = a.queue[:0]
+		a.setBusyLocked(false)
+		a.mu.Unlock()
+		return
+	}
+
+	// Combine steer-channel items (front) with the in-memory queue (back).
+	// Steer messages are placed first so they retain their "act now"
+	// semantics relative to ordinary queued prompts that arrived later.
+	pending := append(steerItems, a.queue...)
+	a.queue = a.queue[:0]
+
+	if len(pending) == 0 {
+		a.setBusyLocked(false)
+		a.mu.Unlock()
+		return
+	}
+
+	// Hand off to drainQueue: it will pick up the first item directly and
+	// scoop the rest from a.queue on its first iteration.
+	first := pending[0]
+	if len(pending) > 1 {
+		a.queue = append(a.queue, pending[1:]...)
+	}
+	// Stay busy across the goroutine swap.
+	a.wg.Add(1)
+	a.mu.Unlock()
+
+	// Notify the UI that steer-channel messages were consumed so the
+	// steering badge can clear; ordinary queued prompts will be reflected
+	// by the QueueUpdatedEvent that drainQueue emits as it picks them up.
+	if len(steerItems) > 0 {
+		a.sendEvent(SteerConsumedEvent{})
+	}
+
+	go a.drainQueue(first)
+}
+
 // --------------------------------------------------------------------------
 // Non-interactive execution
 // --------------------------------------------------------------------------
@@ -497,6 +789,12 @@ func (a *App) CompactAsync(customInstructions string, onComplete func(), onError
 // response text to stdout. No intermediate events are emitted. Blocks until
 // the step completes or ctx is cancelled.
 func (a *App) RunOnce(ctx context.Context, prompt string) error {
+	return a.RunOnceWithFiles(ctx, prompt, nil)
+}
+
+// RunOnceWithFiles executes a single agent step synchronously with optional
+// multimodal file attachments. Prints the response to stdout and returns.
+func (a *App) RunOnceWithFiles(ctx context.Context, prompt string, files []kit.LLMFilePart) error {
 	stepCtx, cancel := context.WithCancel(ctx)
 	defer cancel()

@@ -504,7 +802,7 @@ func (a *App) RunOnce(ctx context.Context, prompt string) error {
 	a.cancelStep = cancel
 	a.mu.Unlock()

-	result, err := a.executeStep(stepCtx, prompt, nil, nil)
+	result, err := a.executeStep(stepCtx, prompt, nil, files)
 	if err != nil {
 		return err
 	}
@@ -519,6 +817,12 @@ func (a *App) RunOnce(ctx context.Context, prompt string) error {
 // full TurnResult without printing anything. This is used by --json mode to
 // capture structured output for serialization.
 func (a *App) RunOnceResult(ctx context.Context, prompt string) (*kit.TurnResult, error) {
+	return a.RunOnceResultWithFiles(ctx, prompt, nil)
+}
+
+// RunOnceResultWithFiles executes a single agent step synchronously with
+// optional multimodal file attachments and returns the full TurnResult.
+func (a *App) RunOnceResultWithFiles(ctx context.Context, prompt string, files []kit.LLMFilePart) (*kit.TurnResult, error) {
 	stepCtx, cancel := context.WithCancel(ctx)
 	defer cancel()

@@ -526,7 +830,7 @@ func (a *App) RunOnceResult(ctx context.Context, prompt string) (*kit.TurnResult
 	a.cancelStep = cancel
 	a.mu.Unlock()

-	return a.executeStep(stepCtx, prompt, nil, nil)
+	return a.executeStep(stepCtx, prompt, nil, files)
 }

 // RunOnceWithDisplay executes a single agent step synchronously, sending
@@ -540,6 +844,12 @@ func (a *App) RunOnceResult(ctx context.Context, prompt string) (*kit.TurnResult
 //
 // Blocks until the step completes or ctx is cancelled.
 func (a *App) RunOnceWithDisplay(ctx context.Context, prompt string, eventFn func(tea.Msg)) error {
+	return a.RunOnceWithDisplayAndFiles(ctx, prompt, eventFn, nil)
+}
+
+// RunOnceWithDisplayAndFiles executes a single agent step synchronously with
+// optional multimodal file attachments, sending intermediate display events.
+func (a *App) RunOnceWithDisplayAndFiles(ctx context.Context, prompt string, eventFn func(tea.Msg), files []kit.LLMFilePart) error {
 	stepCtx, cancel := context.WithCancel(ctx)
 	defer cancel()

@@ -547,7 +857,7 @@ func (a *App) RunOnceWithDisplay(ctx context.Context, prompt string, eventFn fun
 	a.cancelStep = cancel
 	a.mu.Unlock()

-	result, err := a.executeStep(stepCtx, prompt, eventFn, nil)
+	result, err := a.executeStep(stepCtx, prompt, eventFn, files)
 	if err != nil {
 		return err
 	}
@@ -663,7 +973,7 @@ func (a *App) drainQueue(first queueItem) {

 	// Mark as no longer busy
 	a.mu.Lock()
-	a.busy = false
+	a.setBusyLocked(false)
 	a.mu.Unlock()
 }

@@ -870,6 +1180,12 @@ func (a *App) subscribeSDKEvents(sendFn func(tea.Msg), stepUsageSeen *atomic.Boo
 		switch ev := e.(type) {
 		case kit.ToolCallEvent:
 			sendFn(ToolCallStartedEvent{ToolCallID: ev.ToolCallID, ToolName: ev.ToolName, ToolArgs: ev.ToolArgs})
+		case kit.ToolCallStartEvent:
+			sendFn(ToolCallInputStartEvent{ToolCallID: ev.ToolCallID, ToolName: ev.ToolName, ToolKind: ev.ToolKind})
+		case kit.ToolCallDeltaEvent:
+			sendFn(ToolCallInputDeltaEvent{ToolCallID: ev.ToolCallID, Delta: ev.Delta})
+		case kit.ToolCallEndEvent:
+			sendFn(ToolCallInputEndEvent{ToolCallID: ev.ToolCallID})
 		case kit.ToolExecutionStartEvent:
 			sendFn(ToolExecutionEvent{ToolCallID: ev.ToolCallID, ToolName: ev.ToolName, ToolArgs: ev.ToolArgs, IsStarting: true})
 		case kit.ToolExecutionEndEvent:
@@ -899,7 +1215,23 @@ func (a *App) subscribeSDKEvents(sendFn func(tea.Msg), stepUsageSeen *atomic.Boo
 		case kit.SteerConsumedEvent:
 			sendFn(SteerConsumedEvent{})
 		case kit.StepUsageEvent:
-			a.recordStepUsage(ev, stepUsageSeen)
+			a.recordStepUsage(ev, stepUsageSeen, sendFn)
+		case kit.PasswordPromptEvent:
+			// Convert SDK PasswordPromptEvent to app PasswordPromptEvent
+			// The TUI will handle this and send the response back
+			responseCh := make(chan PasswordPromptResponse, 1)
+			sendFn(PasswordPromptEvent{
+				Prompt:     ev.Prompt,
+				ResponseCh: responseCh,
+			})
+			// Wait for TUI response and forward to SDK
+			resp := <-responseCh
+			ev.ResponseCh <- kit.PasswordPromptResponse{
+				Password:  resp.Password,
+				Cancelled: resp.Cancelled,
+			}
+		case kit.TurnEndEvent:
+			a.handleTurnEnd(ev, sendFn)
 		}
 	}))

@@ -910,6 +1242,64 @@ func (a *App) subscribeSDKEvents(sendFn func(tea.Msg), stepUsageSeen *atomic.Boo
 	}
 }

+// handleTurnEnd inspects a turn's final StopReason and surfaces actionable
+// feedback to the user when the turn ended in a state they can act on.
+//
+// Today the only surfaced case is FinishReasonLength — the model hit its
+// configured max_output_tokens budget and the reply was truncated. Without
+// this banner the TUI used to swallow the truncation silently, leading to
+// "ghost" cut-offs with no indication of why.
+//
+// Separated from subscribeSDKEvents so tests can exercise it directly via a
+// stubbed sendFn without standing up a full Kit.
+func (a *App) handleTurnEnd(ev kit.TurnEndEvent, sendFn func(tea.Msg)) {
+	if sendFn == nil {
+		return
+	}
+	if ev.StopReason != kit.FinishReasonLength {
+		return
+	}
+	sendFn(ExtensionPrintEvent{
+		Level: "info",
+		Text:  a.formatMaxTokensTruncatedMessage(),
+	})
+}
+
+// formatMaxTokensTruncatedMessage builds the user-facing explanation for a
+// truncated turn. It reports the active max_output_tokens budget and, when
+// known, the model's catalog output ceiling so the user can judge how much
+// headroom is available.
+func (a *App) formatMaxTokensTruncatedMessage() string {
+	k := a.opts.Kit
+	if k == nil {
+		// Extremely early / test-stub case: still emit a useful generic hint.
+		return "⚠ Response truncated: the model hit the configured max_output_tokens limit. " +
+			"Raise it with --max-tokens N, KIT_MAX_TOKENS=N, or per-model " +
+			"modelSettings[provider/model].maxTokens in config."
+	}
+	current := k.MaxTokens()
+	ceiling := k.MaxOutputLimit()
+	model := k.GetModelString()
+
+	msg := "⚠ Response truncated: "
+	if model != "" {
+		msg += fmt.Sprintf("%s hit the configured max_output_tokens limit", model)
+	} else {
+		msg += "the model hit the configured max_output_tokens limit"
+	}
+	if current > 0 {
+		msg += fmt.Sprintf(" (%d)", current)
+	}
+	msg += "."
+	if ceiling > 0 && current > 0 && ceiling > current {
+		msg += fmt.Sprintf(" This model supports up to %d output tokens.", ceiling)
+	}
+	msg += "\n\nRaise it with --max-tokens N, KIT_MAX_TOKENS=N, " +
+		"or per-model modelSettings[provider/model].maxTokens in your config. " +
+		"Re-run the last prompt after raising it to get the full response."
+	return msg
+}
+
 // QuitFromExtension triggers a graceful shutdown. In interactive mode it
 // sends a tea.QuitMsg to the program so the TUI exits cleanly. In
 // non-interactive mode it cancels the root context, stopping any in-flight
@@ -930,7 +1320,8 @@ func (a *App) QuitFromExtension() {
 // controls styling: "" for plain text, "info" for a system message block,
 // "error" for an error block. In interactive mode it sends an
 // ExtensionPrintEvent through the program so the TUI can render it with the
-// appropriate renderer. In non-interactive mode it falls back to stdout.
+// appropriate renderer. In non-interactive mode it falls back to stderr with
+// a level prefix so errors are distinguishable from plain output.
 func (a *App) PrintFromExtension(level, text string) {
 	a.mu.Lock()
 	prog := a.program
@@ -939,8 +1330,16 @@ func (a *App) PrintFromExtension(level, text string) {
 		prog.Send(ExtensionPrintEvent{Text: text, Level: level})
 		return
 	}
-	// Non-interactive fallback: write directly to stdout.
-	fmt.Println(text)
+	// Non-interactive fallback: write to stderr with a level prefix so that
+	// errors and info messages are distinguishable from plain output.
+	switch level {
+	case "error":
+		fmt.Fprintf(os.Stderr, "[ERROR] %s\n", text)
+	case "info":
+		fmt.Fprintf(os.Stderr, "[INFO] %s\n", text)
+	default:
+		fmt.Println(text)
+	}
 }

 // SetEditorTextFromExtension sends an EditorTextSetEvent to the TUI to
@@ -954,6 +1353,42 @@ func (a *App) SetEditorTextFromExtension(text string) {
 	}
 }

+// RequestNewSessionFromExtension sends a NewSessionRequestEvent to the TUI
+// to end the current session and start a fresh one. If initialPrompt is
+// non-empty it is submitted as the first user turn of the new session.
+//
+// If the agent is currently busy (e.g. the caller is an OnAgentEnd hook that
+// fires before drainQueue clears the busy flag, or there are queued prompts
+// still being processed) the call blocks until the agent becomes idle, up to
+// DefaultNewSessionIdleWait. If that deadline elapses, ErrAgentBusy is
+// returned and callers can detect it with errors.Is. This wait-then-send
+// behavior fixes the v0.79.0 phase-handoff race documented in issue #63.
+//
+// Returns an error when running headless (no TUI attached), when the wait
+// for idle times out (ErrAgentBusy), when the app is shutting down, or when
+// a BeforeSessionSwitch extension hook cancels the switch.
+//
+// This is the implementation behind ctx.NewSession(prompt) for the
+// interactive TUI. It blocks the caller until the TUI processes the
+// switch, so it must be invoked from a goroutine outside Update().
+func (a *App) RequestNewSessionFromExtension(initialPrompt string) error {
+	a.mu.Lock()
+	prog := a.program
+	a.mu.Unlock()
+	if prog == nil {
+		return fmt.Errorf("new session unavailable: no interactive TUI attached")
+	}
+	if err := a.WaitForIdle(DefaultNewSessionIdleWait); err != nil {
+		if errors.Is(err, ErrAgentBusy) {
+			return fmt.Errorf("cannot start new session: %w", err)
+		}
+		return err
+	}
+	ch := make(chan error, 1)
+	prog.Send(NewSessionRequestEvent{InitialPrompt: initialPrompt, ResponseCh: ch})
+	return <-ch
+}
+
 // NotifyModelChanged sends a ModelChangedEvent to the TUI so it updates
 // the model name in the status bar and message attribution.
 func (a *App) NotifyModelChanged(provider, model string) {
@@ -969,32 +1404,47 @@ func (a *App) NotifyModelChanged(provider, model string) {
 // extension widgets. Called from the extension context's SetWidget/RemoveWidget
 // closures. In non-interactive mode this is a no-op (widgets are TUI-only).
 //
-// Coalescing: if a WidgetUpdateEvent is already queued and not yet consumed
-// by the TUI event loop, additional calls within the same ~16 ms window are
-// dropped. This prevents fast extension tickers from flooding BubbleTea's
-// mailbox with redundant re-render triggers.
+// Coalescing (leading + trailing edge): the first call in an idle period
+// fires immediately for responsiveness. Subsequent calls within a ~16 ms
+// debounce window are batched into a single trailing event delivered at
+// the end of the window. The trailing send is essential for correctness:
+// extensions routinely make tight SetWidget→RemoveWidget pairs (e.g. on
+// SubagentEnd) and silently dropping the second call would leave the TUI's
+// layout stuck on stale widget dimensions until some other event happens
+// to trigger a re-render.
 func (a *App) NotifyWidgetUpdate() {
-	// Coalesce: only one pending update at a time.
 	if !a.widgetUpdatePending.CompareAndSwap(false, true) {
+		// A leading-edge event is already in flight — mark that the runner
+		// state has changed again so the trailing send below picks it up.
+		a.widgetUpdateTrailing.Store(true)
 		return
 	}
 	a.mu.Lock()
 	prog := a.program
 	a.mu.Unlock()
-	if prog != nil {
-		prog.Send(WidgetUpdateEvent{})
-		// Reset the pending flag after a short debounce so subsequent calls
-		// within the same render cycle are also coalesced, but new updates
-		// after the cycle are allowed through.
-		go func() {
-			time.Sleep(16 * time.Millisecond) // ~1 frame at 60 fps
-			a.widgetUpdatePending.Store(false)
-		}()
-	} else {
+	if prog == nil {
 		// No program registered (non-interactive mode); clear the flag so
 		// future calls are never permanently blocked.
 		a.widgetUpdatePending.Store(false)
+		return
 	}
+	prog.Send(WidgetUpdateEvent{})
+	go func() {
+		time.Sleep(16 * time.Millisecond) // ~1 frame at 60 fps
+		// If any extra calls came in during the debounce window, deliver
+		// one trailing event so the TUI sees the latest widget state. We
+		// swap-and-test instead of plain-load so concurrent calls after
+		// the trailing send still race correctly with the pending reset.
+		if a.widgetUpdateTrailing.Swap(false) {
+			a.mu.Lock()
+			p := a.program
+			a.mu.Unlock()
+			if p != nil {
+				p.Send(WidgetUpdateEvent{})
+			}
+		}
+		a.widgetUpdatePending.Store(false)
+	}()
 }

 // NotifyContentReload sends a ContentReloadEvent to the TUI so it refreshes
@@ -1010,6 +1460,34 @@ func (a *App) NotifyContentReload() {
 	}
 }

+// NotifyMCPToolsReady sends an MCPToolsReadyEvent to the TUI so it refreshes
+// tool names and MCP tool count from provider callbacks. Called when background
+// MCP tool loading completes. In non-interactive mode this is a no-op.
+func (a *App) NotifyMCPToolsReady() {
+	a.mu.Lock()
+	prog := a.program
+	a.mu.Unlock()
+	if prog != nil {
+		prog.Send(MCPToolsReadyEvent{})
+	}
+}
+
+// NotifyMCPServerLoaded sends an MCPServerLoadedEvent to the TUI so it can
+// display a system message when a single MCP server finishes loading. Called
+// per server as background MCP tool loading progresses.
+func (a *App) NotifyMCPServerLoaded(serverName string, toolCount int, err error) {
+	a.mu.Lock()
+	prog := a.program
+	a.mu.Unlock()
+	if prog != nil {
+		prog.Send(MCPServerLoadedEvent{
+			ServerName: serverName,
+			ToolCount:  toolCount,
+			Error:      err,
+		})
+	}
+}
+
 // SendEvent sends a tea.Msg to the registered program. Safe to call from
 // any goroutine. No-op when no program is registered.
 //
@@ -1094,18 +1572,28 @@ func (a *App) PrintBlockFromExtension(opts extensions.PrintBlockOpts) {
 		})
 		return
 	}
-	// Non-interactive fallback.
+	// Non-interactive fallback: render a simple framed block to stderr so
+	// it is visually distinct from plain stdout output.
 	if opts.Subtitle != "" {
-		fmt.Printf("%s\n  — %s\n", opts.Text, opts.Subtitle)
+		fmt.Fprintf(os.Stderr, "--- %s ---\n%s\n", opts.Subtitle, opts.Text)
 	} else {
-		fmt.Println(opts.Text)
+		fmt.Fprintf(os.Stderr, "---\n%s\n---\n", opts.Text)
 	}
 }

 // recordStepUsage applies token/cost usage reported for a completed step.
 // Step usage events arrive even when a turn is later cancelled, so this keeps
 // the usage widget accurate on all stop paths.
-func (a *App) recordStepUsage(ev kit.StepUsageEvent, stepUsageSeen *atomic.Bool) {
+//
+// Both session totals (cost, token counts) and the context window fill level
+// are updated here so the status bar reflects progress after every LLM call,
+// not just at the end of the full turn. Context fill monotonically increases
+// across steps because each step re-sends the entire conversation plus any
+// new tool results, so the numbers only go up.
+//
+// sendFn is called with a UsageUpdatedEvent to trigger a TUI re-render so
+// the updated values are visible immediately.
+func (a *App) recordStepUsage(ev kit.StepUsageEvent, stepUsageSeen *atomic.Bool, sendFn func(tea.Msg)) {
 	hasUsage := ev.InputTokens > 0 || ev.OutputTokens > 0 || ev.CacheReadTokens > 0 || ev.CacheWriteTokens > 0
 	if a.opts.Debug {
 		log.Printf("[DEBUG] recordStepUsage: hasUsage=%v input=%d output=%d cacheRead=%d cacheWrite=%d",
@@ -1126,10 +1614,21 @@ func (a *App) recordStepUsage(ev kit.StepUsageEvent, stepUsageSeen *atomic.Bool)
 		int(ev.CacheReadTokens),
 		int(ev.CacheWriteTokens),
 	)
-	// NOTE: We do NOT call SetContextTokens here. Context fill is set once
-	// at turn completion via updateUsageFromTurnResult using FinalUsage.InputTokens,
-	// which reflects the full accumulated context. Per-step context tokens would
-	// cause the display to jump around during multi-step tool calls.
+	// Update context window fill from this step's usage. Each step sends
+	// the full conversation to the LLM, so the reported token counts
+	// represent the actual context utilization at that point.
+	contextFill := int(ev.InputTokens) + int(ev.CacheReadTokens) + int(ev.CacheWriteTokens) + int(ev.OutputTokens)
+	if contextFill > 0 {
+		if a.opts.Debug {
+			log.Printf("[DEBUG] recordStepUsage: SetContextTokens=%d (Input=%d + CacheRead=%d + CacheWrite=%d + Output=%d)",
+				contextFill, ev.InputTokens, ev.CacheReadTokens, ev.CacheWriteTokens, ev.OutputTokens)
+		}
+		a.opts.UsageTracker.SetContextTokens(contextFill)
+	}
+	// Notify the TUI so it re-renders the status bar with updated values.
+	if sendFn != nil {
+		sendFn(UsageUpdatedEvent{})
+	}
 }

 // updateUsageFromTurnResult records token usage from an SDK TurnResult into the
@@ -1193,15 +1692,30 @@ func (a *App) updateUsageFromTurnResult(result *kit.TurnResult, userPrompt strin
 	}

 	// --- Context window fill (drives the % bar) ---
-	// Use FinalUsage.InputTokens as the context window fill. The API's InputTokens
-	// already includes the full conversation history (system prompt + all previous
-	// messages + current user message). Adding OutputTokens would double-count since
-	// the output becomes part of the input for the next turn.
-	if result.FinalUsage != nil && result.FinalUsage.InputTokens > 0 {
-		if a.opts.Debug {
-			log.Printf("[DEBUG] updateUsageFromTurnResult: calling SetContextTokens=%d (FinalUsage.InputTokens)",
-				result.FinalUsage.InputTokens)
+	// Calculate context fill from the LAST API call's usage. The context
+	// window is filled by everything sent to and received from the model:
+	//
+	//   InputTokens       — non-cached input (may be small with prompt caching)
+	//   CacheReadTokens   — input tokens served from cache
+	//   CacheCreationTokens — input tokens written to cache this call
+	//   OutputTokens      — assistant output (becomes input next turn)
+	//
+	// With Anthropic prompt caching, InputTokens can drop to near-zero while
+	// CacheReadTokens holds the bulk of the context. We must sum all four to
+	// get the true context window utilization.
+	//
+	// We use FinalUsage (last step only), NOT TotalUsage, because TotalUsage
+	// sums across all tool-calling steps — and each step re-sends the full
+	// conversation, so TotalUsage massively overstates the actual window fill.
+	if result.FinalUsage != nil {
+		u := result.FinalUsage
+		contextFill := int(u.InputTokens) + int(u.CacheReadTokens) + int(u.CacheCreationTokens) + int(u.OutputTokens)
+		if contextFill > 0 {
+			if a.opts.Debug {
+				log.Printf("[DEBUG] updateUsageFromTurnResult: SetContextTokens=%d (Input=%d + CacheRead=%d + CacheCreate=%d + Output=%d)",
+					contextFill, u.InputTokens, u.CacheReadTokens, u.CacheCreationTokens, u.OutputTokens)
+			}
+			a.opts.UsageTracker.SetContextTokens(contextFill)
 		}
-		a.opts.UsageTracker.SetContextTokens(int(result.FinalUsage.InputTokens))
 	}
 }
@@ -3,11 +3,16 @@ package app
 import (
 	"context"
 	"errors"
+	"strings"
 	"sync"
 	"testing"
 	"time"

+	tea "charm.land/bubbletea/v2"
+	"charm.land/fantasy"
 	kit "github.com/mark3labs/kit/pkg/kit"
+
+	"github.com/mark3labs/kit/internal/session"
 )

 // --------------------------------------------------------------------------
@@ -532,9 +537,9 @@ func TestQueueLength_reflects(t *testing.T) {
 }

 // TestRecordStepUsage_updatesTracker verifies that per-step usage updates are
-// recorded immediately for cost tracking. Context tokens are NOT updated here
-// (only via updateUsageFromTurnResult) to avoid display jumps during multi-step
-// tool calls.
+// recorded immediately for cost tracking. Context tokens are also updated so
+// the status bar reflects context fill after every LLM call in a multi-step
+// turn, not just at the end.
 func TestRecordStepUsage_updatesTracker(t *testing.T) {
 	usage := &usageUpdaterStub{}
 	app := New(Options{UsageTracker: usage}, nil)
@@ -545,7 +550,7 @@ func TestRecordStepUsage_updatesTracker(t *testing.T) {
 		OutputTokens:     45,
 		CacheReadTokens:  5,
 		CacheWriteTokens: 2,
-	}, nil)
+	}, nil, nil)

 	usage.mu.Lock()
 	defer usage.mu.Unlock()
@@ -557,9 +562,13 @@ func TestRecordStepUsage_updatesTracker(t *testing.T) {
 		t.Fatalf("unexpected usage update payload: in=%d out=%d cache_read=%d cache_write=%d",
 			usage.lastUpdateInput, usage.lastUpdateOutput, usage.lastUpdateCacheRead, usage.lastUpdateCacheWrite)
 	}
-	// Context tokens should NOT be updated by recordStepUsage (only by updateUsageFromTurnResult)
-	if usage.contextCalls != 0 {
-		t.Fatalf("expected 0 context token updates from recordStepUsage, got %d", usage.contextCalls)
+	// Context tokens should now be updated per-step (Input + CacheRead + CacheWrite + Output).
+	if usage.contextCalls != 1 {
+		t.Fatalf("expected 1 context token update from recordStepUsage, got %d", usage.contextCalls)
+	}
+	expectedContext := 120 + 45 + 5 + 2
+	if usage.lastContextTokens != expectedContext {
+		t.Fatalf("expected context tokens %d, got %d", expectedContext, usage.lastContextTokens)
 	}
 }

@@ -630,10 +639,12 @@ func TestUpdateUsageFromTurnResult_recordsWhenInputTokensZero(t *testing.T) {
 	}
 }

-// TestUpdateUsageFromTurnResult_contextTokensUsesInputOnly verifies that context
-// window fill uses InputTokens only (not input+output). The API's InputTokens
-// already includes the full conversation history; adding output would double-count.
-func TestUpdateUsageFromTurnResult_contextTokensUsesInputOnly(t *testing.T) {
+// TestUpdateUsageFromTurnResult_contextTokensUsesAllCategories verifies that
+// context window fill uses all token categories from the final API call:
+// InputTokens + CacheReadTokens + CacheCreationTokens + OutputTokens.
+// With Anthropic prompt caching, InputTokens can be near-zero while
+// CacheReadTokens holds the bulk of the context.
+func TestUpdateUsageFromTurnResult_contextTokensUsesAllCategories(t *testing.T) {
 	usage := &usageUpdaterStub{}
 	app := New(Options{UsageTracker: usage}, nil)
 	defer app.Close()
@@ -641,22 +652,744 @@ func TestUpdateUsageFromTurnResult_contextTokensUsesInputOnly(t *testing.T) {
 	app.updateUsageFromTurnResult(&kit.TurnResult{
 		Response: "ok",
 		TotalUsage: &kit.LLMUsage{
-			InputTokens:  1000,
-			OutputTokens: 200,
+			InputTokens:         3,
+			OutputTokens:        5,
+			CacheReadTokens:     0,
+			CacheCreationTokens: 4317,
 		},
 		FinalUsage: &kit.LLMUsage{
-			InputTokens:  1000, // Full context including history
-			OutputTokens: 200,
+			InputTokens:         3,    // Non-cached input (small with caching)
+			OutputTokens:        5,    // Assistant output
+			CacheReadTokens:     0,    // No cache reads on first call
+			CacheCreationTokens: 4317, // System prompt + tools written to cache
 		},
 	}, "prompt", false)

 	usage.mu.Lock()
 	defer usage.mu.Unlock()

-	// Context tokens should be InputTokens only (1000), not input+output (1200)
-	// because InputTokens already includes the full conversation history
-	if usage.contextCalls != 1 || usage.lastContextTokens != 1000 {
-		t.Fatalf("expected context tokens=1000 (InputTokens only), got calls=%d tokens=%d",
-			usage.contextCalls, usage.lastContextTokens)
+	// Context tokens should be Input + CacheRead + CacheCreate + Output = 4325
+	expected := 3 + 0 + 4317 + 5
+	if usage.contextCalls != 1 || usage.lastContextTokens != expected {
+		t.Fatalf("expected context tokens=%d (all categories), got calls=%d tokens=%d",
+			expected, usage.contextCalls, usage.lastContextTokens)
+	}
+}
+
+// TestHandleTurnEnd_LengthEmitsWarning verifies that when the SDK reports a
+// FinishReasonLength (max_output_tokens hit), the app surfaces a user-visible
+// ExtensionPrintEvent with Level="info" so the TUI can render a banner
+// instead of silently showing a truncated reply.
+func TestHandleTurnEnd_LengthEmitsWarning(t *testing.T) {
+	app := New(Options{}, nil)
+	defer app.Close()
+
+	var mu sync.Mutex
+	var received []tea.Msg
+	sendFn := func(m tea.Msg) {
+		mu.Lock()
+		defer mu.Unlock()
+		received = append(received, m)
+	}
+
+	app.handleTurnEnd(kit.TurnEndEvent{StopReason: kit.FinishReasonLength}, sendFn)
+
+	mu.Lock()
+	defer mu.Unlock()
+	if len(received) != 1 {
+		t.Fatalf("expected 1 event on length stop, got %d", len(received))
+	}
+	ev, ok := received[0].(ExtensionPrintEvent)
+	if !ok {
+		t.Fatalf("expected ExtensionPrintEvent, got %T", received[0])
+	}
+	if ev.Level != "info" {
+		t.Errorf("expected Level=info, got %q", ev.Level)
+	}
+	if ev.Text == "" {
+		t.Error("expected non-empty warning text")
+	}
+	if !strings.Contains(ev.Text, "max_output_tokens") {
+		t.Errorf("warning text should mention max_output_tokens, got: %s", ev.Text)
+	}
+}
+
+// TestHandleTurnEnd_NonLengthIgnored verifies that ordinary stop reasons
+// (stop, tool-calls, error, unknown, "") do not produce a warning banner.
+func TestHandleTurnEnd_NonLengthIgnored(t *testing.T) {
+	app := New(Options{}, nil)
+	defer app.Close()
+
+	reasons := []string{
+		kit.FinishReasonStop,
+		kit.FinishReasonToolCalls,
+		kit.FinishReasonError,
+		kit.FinishReasonContentFilter,
+		kit.FinishReasonOther,
+		kit.FinishReasonUnknown,
+		"",
+	}
+	for _, r := range reasons {
+		var called bool
+		app.handleTurnEnd(kit.TurnEndEvent{StopReason: r}, func(m tea.Msg) {
+			called = true
+		})
+		if called {
+			t.Errorf("stop reason %q unexpectedly emitted a warning", r)
+		}
+	}
+}
+
+// TestHandleTurnEnd_NilSendFn guards against panics when no TUI listener is
+// attached (e.g. early init or headless teardown).
+func TestHandleTurnEnd_NilSendFn(t *testing.T) {
+	app := New(Options{}, nil)
+	defer app.Close()
+
+	// Should not panic with a nil sendFn.
+	app.handleTurnEnd(kit.TurnEndEvent{StopReason: kit.FinishReasonLength}, nil)
+}
+
+// TestFormatMaxTokensTruncatedMessage_NoKit verifies the fallback message
+// when Options.Kit is nil (test/stub path).
+func TestFormatMaxTokensTruncatedMessage_NoKit(t *testing.T) {
+	app := New(Options{}, nil)
+	defer app.Close()
+
+	msg := app.formatMaxTokensTruncatedMessage()
+	if msg == "" {
+		t.Fatal("expected non-empty fallback message")
+	}
+	for _, needle := range []string{"max_output_tokens", "--max-tokens", "KIT_MAX_TOKENS", "modelSettings"} {
+		if !strings.Contains(msg, needle) {
+			t.Errorf("fallback message missing %q:\n%s", needle, msg)
+		}
+	}
+}
+
+// --------------------------------------------------------------------------
+// releaseBusyAfterCompact (issue #27)
+// --------------------------------------------------------------------------
+
+// TestReleaseBusyAfterCompact_flushesQueuedMessages is a regression test for
+// issue #27: messages queued via Run() while /compact is running used to sit
+// in a.queue indefinitely until the user typed another prompt. After the fix
+// the deferred releaseBusyAfterCompact tail picks up any pending items and
+// dispatches drainQueue automatically.
+//
+// We simulate the compaction completion path directly (bypassing the SDK)
+// by toggling busy=true, populating the queue exactly as Run() would have
+// during compaction, and then invoking releaseBusyAfterCompact.
+func TestReleaseBusyAfterCompact_flushesQueuedMessages(t *testing.T) {
+	stub := newStubWithFuncs(
+		func(ctx context.Context) (*kit.TurnResult, error) {
+			return turnResult("compacted then drained"), nil
+		},
+	)
+	app := newTestApp(stub)
+	defer app.Close()
+
+	// Simulate the state at the start of the compaction tail: busy is set
+	// and a couple of prompts have piled up in the queue while we were
+	// summarising. (Run() would have appended them and returned a queue
+	// length > 0 to the caller.)
+	app.mu.Lock()
+	app.setBusyLocked(true)
+	app.queue = append(app.queue,
+		queueItem{Prompt: "queued during compact #1"},
+		queueItem{Prompt: "queued during compact #2"},
+	)
+	app.mu.Unlock()
+
+	// Invoke the deferred tail directly. It should kick off drainQueue.
+	app.releaseBusyAfterCompact()
+
+	// drainQueue runs in a goroutine. Wait for the app to come back to idle.
+	ok := waitForCondition(2*time.Second, func() bool {
+		app.mu.Lock()
+		defer app.mu.Unlock()
+		return !app.busy
+	})
+	if !ok {
+		t.Fatal("app did not become idle after releaseBusyAfterCompact: queue not drained")
+	}
+
+	// Wait for any in-flight goroutine to finish before reading state.
+	app.wg.Wait()
+
+	if got := app.QueueLength(); got != 0 {
+		t.Fatalf("expected empty queue after drain, got %d", got)
+	}
+	if n := stub.callCount(); n == 0 {
+		t.Fatalf("expected stub PromptFunc to fire at least once after compact, got %d calls", n)
+	}
+}
+
+// TestReleaseBusyAfterCompact_idleWhenQueueEmpty verifies that with no
+// pending messages the helper just clears busy and does NOT spawn a
+// drainQueue goroutine (no spurious agent turn).
+func TestReleaseBusyAfterCompact_idleWhenQueueEmpty(t *testing.T) {
+	stub := newStub()
+	app := newTestApp(stub)
+	defer app.Close()
+
+	app.mu.Lock()
+	app.setBusyLocked(true)
+	app.mu.Unlock()
+
+	app.releaseBusyAfterCompact()
+
+	app.mu.Lock()
+	busy := app.busy
+	app.mu.Unlock()
+	if busy {
+		t.Fatal("expected busy=false after releaseBusyAfterCompact with empty queue")
+	}
+
+	// Give any rogue goroutine a moment to (incorrectly) call PromptFunc.
+	time.Sleep(50 * time.Millisecond)
+	if n := stub.callCount(); n != 0 {
+		t.Fatalf("expected 0 PromptFunc calls when queue empty, got %d", n)
+	}
+}
+
+// TestReleaseBusyAfterCompact_splicesSteerAheadOfQueue exercises the SDK
+// steer-drain branch of releaseBusyAfterCompact (issue #27 follow-up).
+//
+// Production wires a.opts.Kit.DrainSteer() to pull messages that arrived via
+// Steer/SteerWithFiles during compaction, but Options.Kit is *kit.Kit (a
+// concrete struct) so unit tests cannot stand up a real instance without a
+// full LLM backend. The test uses the unexported steerDrainFn seam to inject
+// fake steer items, then asserts that:
+//
+//   - Steer items are dispatched ahead of any prompts that piled up in
+//     a.queue (steer retains "act now" priority over ordinary queued
+//     prompts), and
+//   - the helper still hands off to drainQueue so the steer item actually
+//     fires (the previous behaviour left them stranded — see #27).
+func TestReleaseBusyAfterCompact_splicesSteerAheadOfQueue(t *testing.T) {
+	var pmu sync.Mutex
+	var firstPrompt string
+	stub := newStubWithFuncs(
+		func(ctx context.Context) (*kit.TurnResult, error) {
+			return turnResult("steer dispatched"), nil
+		},
+	)
+	// Wrap PromptFunc so we can capture the prompt text the stub receives
+	// (newStubWithFuncs's fns ignore prompt; we need it to verify ordering).
+	capturingPrompt := func(ctx context.Context, prompt string) (*kit.TurnResult, error) {
+		pmu.Lock()
+		if firstPrompt == "" {
+			firstPrompt = prompt
+		}
+		pmu.Unlock()
+		return stub.fn(ctx, prompt)
+	}
+	app := New(Options{PromptFunc: capturingPrompt}, nil)
+	defer app.Close()
+
+	// Inject fake steer items via the test seam. In production the same
+	// items would have been delivered through Kit.InjectSteerWithFiles
+	// during /compact and pulled by DrainSteer here.
+	app.steerDrainFn = func() []queueItem {
+		return []queueItem{
+			{Prompt: "steer-1"},
+			{Prompt: "steer-2"},
+		}
+	}
+
+	// Simulate the state at the end of compaction: busy is set and a couple
+	// of regular Run() prompts have piled up after the steer messages.
+	app.mu.Lock()
+	app.setBusyLocked(true)
+	app.queue = append(app.queue,
+		queueItem{Prompt: "queued-1"},
+		queueItem{Prompt: "queued-2"},
+	)
+	app.mu.Unlock()
+
+	app.releaseBusyAfterCompact()
+
+	// Wait for the dispatched batch to complete.
+	ok := waitForCondition(2*time.Second, func() bool {
+		app.mu.Lock()
+		defer app.mu.Unlock()
+		return !app.busy
+	})
+	if !ok {
+		t.Fatal("app did not become idle after steer-spliced releaseBusyAfterCompact")
+	}
+	app.wg.Wait()
+
+	// drainQueue picks up `first` directly and batches the rest. With
+	// PromptFunc set, executeBatch invokes us with items[0] only — that
+	// item must be the first steer message, proving steer items were
+	// spliced ahead of the previously queued prompts.
+	pmu.Lock()
+	got := firstPrompt
+	pmu.Unlock()
+	if got != "steer-1" {
+		t.Fatalf("expected first dispatched prompt to be steer item %q (steer items must come before queued prompts), got %q",
+			"steer-1", got)
+	}
+
+	// Queue should be fully drained and PromptFunc must have actually fired.
+	if n := app.QueueLength(); n != 0 {
+		t.Fatalf("expected empty queue after drain, got %d entries", n)
+	}
+	if n := stub.callCount(); n == 0 {
+		t.Fatal("expected stub PromptFunc to fire at least once after splice")
+	}
+}
+
+// TestReleaseBusyAfterCompact_dropsQueueWhenClosed verifies that if the app
+// was closed during compaction the helper discards any pending items rather
+// than spawning drainQueue against a torn-down App.
+func TestReleaseBusyAfterCompact_dropsQueueWhenClosed(t *testing.T) {
+	stub := newStub()
+	app := newTestApp(stub)
+
+	app.mu.Lock()
+	app.setBusyLocked(true)
+	app.queue = append(app.queue, queueItem{Prompt: "would have run"})
+	app.closed = true
+	app.mu.Unlock()
+
+	app.releaseBusyAfterCompact()
+
+	app.mu.Lock()
+	busy := app.busy
+	qLen := len(app.queue)
+	app.mu.Unlock()
+	if busy {
+		t.Fatal("expected busy=false even when closed")
+	}
+	if qLen != 0 {
+		t.Fatalf("expected queue cleared on closed app, got %d entries", qLen)
+	}
+	time.Sleep(20 * time.Millisecond)
+	if n := stub.callCount(); n != 0 {
+		t.Fatalf("expected 0 PromptFunc calls on closed app, got %d", n)
+	}
+}
+
+// --------------------------------------------------------------------------
+// PopLastUserMessage (/retry building block)
+// --------------------------------------------------------------------------
+
+// TestPopLastUserMessage_NoTreeSession verifies that PopLastUserMessage
+// returns an error when no tree session is active.
+func TestPopLastUserMessage_NoTreeSession(t *testing.T) {
+	app := newTestApp(newStub())
+	defer app.Close()
+
+	prompt, files, err := app.PopLastUserMessage()
+	if err == nil {
+		t.Fatal("expected error when no tree session is active")
+	}
+	if prompt != "" || files != nil {
+		t.Fatalf("expected zero values on error, got prompt=%q files=%v", prompt, files)
+	}
+}
+
+// TestPopLastUserMessage_WhileBusy verifies that PopLastUserMessage
+// refuses to truncate while the agent is busy (would race with executeBatch).
+func TestPopLastUserMessage_WhileBusy(t *testing.T) {
+	app := newTestApp(newStub())
+	defer app.Close()
+
+	app.mu.Lock()
+	app.setBusyLocked(true)
+	app.mu.Unlock()
+
+	_, _, err := app.PopLastUserMessage()
+	if err == nil {
+		t.Fatal("expected error when agent is busy")
+	}
+	if !strings.Contains(err.Error(), "working") {
+		t.Fatalf("expected error mentioning busy/working, got %q", err.Error())
+	}
+}
+
+// TestPopLastUserMessage_WhenClosed verifies that PopLastUserMessage
+// returns an error after Close().
+func TestPopLastUserMessage_WhenClosed(t *testing.T) {
+	app := newTestApp(newStub())
+	app.Close()
+
+	_, _, err := app.PopLastUserMessage()
+	if err == nil {
+		t.Fatal("expected error on closed app")
+	}
+}
+
+// TestPopLastUserMessage_TruncatesAndReturnsPrompt verifies the happy path:
+// a real tree session with user→assistant→user→assistant entries is
+// truncated back to before the most recent user message, and that user's
+// text is returned.
+func TestPopLastUserMessage_TruncatesAndReturnsPrompt(t *testing.T) {
+	dir := t.TempDir()
+	ts, err := session.CreateTreeSession(dir)
+	if err != nil {
+		t.Fatalf("create tree session: %v", err)
+	}
+	defer func() { _ = ts.Close() }()
+
+	// Build history: user "first" → assistant "ack 1" → user "second" → assistant "ack 2".
+	if _, err := ts.AppendLLMMessage(fantasy.NewUserMessage("first")); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := ts.AppendLLMMessage(fantasy.Message{
+		Role:    fantasy.MessageRoleAssistant,
+		Content: []fantasy.MessagePart{fantasy.TextPart{Text: "ack 1"}},
+	}); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := ts.AppendLLMMessage(fantasy.NewUserMessage("second")); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := ts.AppendLLMMessage(fantasy.Message{
+		Role:    fantasy.MessageRoleAssistant,
+		Content: []fantasy.MessagePart{fantasy.TextPart{Text: "ack 2"}},
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	app := New(Options{TreeSession: ts, PromptFunc: newStub().fn}, nil)
+	defer app.Close()
+
+	prompt, files, err := app.PopLastUserMessage()
+	if err != nil {
+		t.Fatalf("PopLastUserMessage: %v", err)
+	}
+	if prompt != "second" {
+		t.Fatalf("expected prompt=%q, got %q", "second", prompt)
+	}
+	if files != nil {
+		t.Fatalf("expected no files, got %v", files)
+	}
+
+	// After truncation the branch should only contain the first user
+	// message and its assistant response (the "second" turn is orphaned).
+	msgs := ts.GetLLMMessages()
+	if len(msgs) != 2 {
+		t.Fatalf("expected 2 messages on truncated branch, got %d", len(msgs))
+	}
+	if got := messageText(msgs[0]); got != "first" {
+		t.Fatalf("expected first message %q, got %q", "first", got)
+	}
+	if got := messageText(msgs[1]); got != "ack 1" {
+		t.Fatalf("expected second message %q, got %q", "ack 1", got)
+	}
+}
+
+// messageText extracts concatenated TextPart content from a fantasy.Message.
+func messageText(m fantasy.Message) string {
+	var out strings.Builder
+	for _, p := range m.Content {
+		if tp, ok := p.(fantasy.TextPart); ok {
+			out.WriteString(tp.Text)
+		}
+	}
+	return out.String()
+}
+
+// TestPopLastUserMessage_NoUserOnBranch verifies that an empty tree (no
+// user messages at all) returns a friendly error rather than panicking.
+func TestPopLastUserMessage_NoUserOnBranch(t *testing.T) {
+	dir := t.TempDir()
+	ts, err := session.CreateTreeSession(dir)
+	if err != nil {
+		t.Fatalf("create tree session: %v", err)
+	}
+	defer func() { _ = ts.Close() }()
+
+	app := New(Options{TreeSession: ts, PromptFunc: newStub().fn}, nil)
+	defer app.Close()
+
+	_, _, err = app.PopLastUserMessage()
+	if err == nil {
+		t.Fatal("expected error when no user message exists on branch")
+	}
+	if !strings.Contains(err.Error(), "no user message") {
+		t.Fatalf("expected error mentioning missing user message, got %q", err.Error())
+	}
+}
+
+// --------------------------------------------------------------------------
+// WaitForIdle / RequestNewSessionFromExtension (issue #63)
+// --------------------------------------------------------------------------
+
+// TestWaitForIdle_AlreadyIdle verifies the fast path: a freshly constructed
+// App is idle and WaitForIdle returns immediately without consulting the
+// timeout.
+func TestWaitForIdle_AlreadyIdle(t *testing.T) {
+	app := newTestApp(newStub())
+	defer app.Close()
+
+	start := time.Now()
+	if err := app.WaitForIdle(2 * time.Second); err != nil {
+		t.Fatalf("WaitForIdle on idle app: %v", err)
+	}
+	if elapsed := time.Since(start); elapsed > 100*time.Millisecond {
+		t.Fatalf("WaitForIdle blocked for %s on already-idle app", elapsed)
+	}
+}
+
+// TestWaitForIdle_BlocksUntilDrain reproduces the issue #63 race: while
+// drainQueue holds busy==true the call should block, then return nil as soon
+// as the drain completes.
+func TestWaitForIdle_BlocksUntilDrain(t *testing.T) {
+	gate := make(chan struct{})
+	var gateOnce sync.Once
+	closeGate := func() { gateOnce.Do(func() { close(gate) }) }
+	stub := newStubWithFuncs(
+		func(ctx context.Context) (*kit.TurnResult, error) {
+			select {
+			case <-gate:
+			case <-ctx.Done():
+				return nil, ctx.Err()
+			}
+			return turnResult("done"), nil
+		},
+	)
+	app := newTestApp(stub)
+	t.Cleanup(func() {
+		closeGate()
+		app.Close()
+	})
+
+	app.Run("hello")
+
+	// Confirm the agent is busy before we start waiting.
+	if !waitForCondition(2*time.Second, func() bool { return app.IsBusy() }) {
+		t.Fatal("app never became busy after Run()")
+	}
+
+	errCh := make(chan error, 1)
+	go func() {
+		errCh <- app.WaitForIdle(5 * time.Second)
+	}()
+
+	// Should not return while the stub is blocked.
+	select {
+	case err := <-errCh:
+		t.Fatalf("WaitForIdle returned early (err=%v) while agent still busy", err)
+	case <-time.After(150 * time.Millisecond):
+	}
+
+	closeGate()
+
+	select {
+	case err := <-errCh:
+		if err != nil {
+			t.Fatalf("WaitForIdle: %v", err)
+		}
+	case <-time.After(3 * time.Second):
+		t.Fatal("WaitForIdle did not return after drain completed")
+	}
+
+	if app.IsBusy() {
+		t.Fatal("app still reports busy after WaitForIdle returned")
+	}
+}
+
+// TestWaitForIdle_TimeoutReturnsErrAgentBusy verifies that a slow turn yields
+// ErrAgentBusy (detectable via errors.Is) when the deadline elapses.
+func TestWaitForIdle_TimeoutReturnsErrAgentBusy(t *testing.T) {
+	gate := make(chan struct{})
+	stub := newStubWithFuncs(
+		func(ctx context.Context) (*kit.TurnResult, error) {
+			select {
+			case <-gate:
+			case <-ctx.Done():
+				return nil, ctx.Err()
+			}
+			return turnResult("done"), nil
+		},
+	)
+	app := newTestApp(stub)
+	// Release the stub before Close so wg.Wait() can return.
+	t.Cleanup(func() {
+		close(gate)
+		app.Close()
+	})
+
+	app.Run("hello")
+	if !waitForCondition(2*time.Second, func() bool { return app.IsBusy() }) {
+		t.Fatal("app never became busy after Run()")
+	}
+
+	err := app.WaitForIdle(50 * time.Millisecond)
+	if !errors.Is(err, ErrAgentBusy) {
+		t.Fatalf("expected ErrAgentBusy on timeout, got %v", err)
+	}
+}
+
+// TestWaitForIdle_ZeroTimeoutWaitsIndefinitely verifies that a non-positive
+// timeout still blocks until idle (or shutdown) — not an instant ErrAgentBusy.
+func TestWaitForIdle_ZeroTimeoutWaitsIndefinitely(t *testing.T) {
+	gate := make(chan struct{})
+	var gateOnce sync.Once
+	closeGate := func() { gateOnce.Do(func() { close(gate) }) }
+	stub := newStubWithFuncs(
+		func(ctx context.Context) (*kit.TurnResult, error) {
+			select {
+			case <-gate:
+			case <-ctx.Done():
+				return nil, ctx.Err()
+			}
+			return turnResult("done"), nil
+		},
+	)
+	app := newTestApp(stub)
+	t.Cleanup(func() {
+		closeGate()
+		app.Close()
+	})
+
+	app.Run("hello")
+	if !waitForCondition(2*time.Second, func() bool { return app.IsBusy() }) {
+		t.Fatal("app never became busy after Run()")
+	}
+
+	errCh := make(chan error, 1)
+	go func() { errCh <- app.WaitForIdle(0) }()
+
+	select {
+	case err := <-errCh:
+		t.Fatalf("WaitForIdle(0) returned early with %v while agent was busy", err)
+	case <-time.After(150 * time.Millisecond):
+	}
+
+	closeGate()
+
+	select {
+	case err := <-errCh:
+		if err != nil {
+			t.Fatalf("WaitForIdle(0) returned %v after idle", err)
+		}
+	case <-time.After(3 * time.Second):
+		t.Fatal("WaitForIdle(0) did not return after drain completed")
+	}
+}
+
+// TestWaitForIdle_AppClose verifies that shutting down the app while a
+// caller is blocked in WaitForIdle releases the wait.
+func TestWaitForIdle_AppClose(t *testing.T) {
+	gate := make(chan struct{})
+	stub := newStubWithFuncs(
+		func(ctx context.Context) (*kit.TurnResult, error) {
+			select {
+			case <-gate:
+			case <-ctx.Done():
+				return nil, ctx.Err()
+			}
+			return turnResult("done"), nil
+		},
+	)
+	app := newTestApp(stub)
+
+	app.Run("hello")
+	if !waitForCondition(2*time.Second, func() bool { return app.IsBusy() }) {
+		t.Fatal("app never became busy after Run()")
+	}
+
+	errCh := make(chan error, 1)
+	go func() { errCh <- app.WaitForIdle(5 * time.Second) }()
+
+	// Give the goroutine a moment to enter the wait.
+	time.Sleep(50 * time.Millisecond)
+
+	// rootCancel is called by Close, which should release the waiter
+	// before drainQueue itself observes the cancellation and clears busy.
+	go func() {
+		// Unblock the stub so Close() can proceed past wg.Wait().
+		close(gate)
+	}()
+	app.Close()
+
+	select {
+	case err := <-errCh:
+		// Either rootCtx cancellation propagated first (err = context.Canceled)
+		// or the drain finished cleanly first (err == nil); both are
+		// acceptable terminations. The key invariant is that WaitForIdle
+		// does not hang past Close.
+		if err != nil && !errors.Is(err, context.Canceled) {
+			t.Fatalf("WaitForIdle returned unexpected error: %v", err)
+		}
+	case <-time.After(3 * time.Second):
+		t.Fatal("WaitForIdle did not return after Close()")
+	}
+}
+
+// TestRequestNewSessionFromExtension_NoTUI verifies the headless guard: with
+// no Bubble Tea program registered the call fails fast (no busy-wait).
+func TestRequestNewSessionFromExtension_NoTUI(t *testing.T) {
+	app := newTestApp(newStub())
+	defer app.Close()
+
+	err := app.RequestNewSessionFromExtension("hello")
+	if err == nil {
+		t.Fatal("expected error in headless mode")
+	}
+	if !strings.Contains(err.Error(), "no interactive TUI") {
+		t.Fatalf("expected 'no interactive TUI' error, got %q", err.Error())
+	}
+}
+
+// TestBusyTransitionsSignalIdleCh exercises the setBusyLocked invariants
+// directly: a fresh App is idle (closed channel); Run() opens a new channel
+// that is then closed when drainQueue exits.
+func TestBusyTransitionsSignalIdleCh(t *testing.T) {
+	app := newTestApp(newStub("ok"))
+	defer app.Close()
+
+	// Initial state: closed channel, busy==false.
+	busy, ch := app.idleSnapshot()
+	if busy {
+		t.Fatal("freshly constructed App should not be busy")
+	}
+	select {
+	case <-ch:
+	default:
+		t.Fatal("initial idleCh should already be closed")
+	}
+
+	gate := make(chan struct{})
+	var gateOnce sync.Once
+	closeGate := func() { gateOnce.Do(func() { close(gate) }) }
+	stub := newStubWithFuncs(func(ctx context.Context) (*kit.TurnResult, error) {
+		select {
+		case <-gate:
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		}
+		return turnResult("ok"), nil
+	})
+	app2 := newTestApp(stub)
+	t.Cleanup(func() {
+		closeGate()
+		app2.Close()
+	})
+
+	app2.Run("hello")
+	if !waitForCondition(2*time.Second, func() bool { return app2.IsBusy() }) {
+		t.Fatal("app2 never became busy")
+	}
+
+	_, ch2 := app2.idleSnapshot()
+	select {
+	case <-ch2:
+		t.Fatal("idleCh should be open while busy")
+	default:
+	}
+
+	closeGate()
+
+	select {
+	case <-ch2:
+	case <-time.After(3 * time.Second):
+		t.Fatal("idleCh was never closed after drain completed")
 	}
 }
@@ -32,6 +32,36 @@ type ToolCallStartedEvent struct {
 	ToolArgs string
 }

+// ToolCallInputStartEvent is sent when the LLM begins generating tool call
+// arguments. The tool name is known but the full argument JSON is still being
+// streamed. UIs can use this to show a "running" indicator immediately instead
+// of waiting for the full argument JSON to finish streaming.
+type ToolCallInputStartEvent struct {
+	// ToolCallID is the stable identifier for correlating tool lifecycle events.
+	ToolCallID string
+	// ToolName is the name of the tool being called.
+	ToolName string
+	// ToolKind classifies the tool: "execute", "edit", "read", "search", "agent".
+	ToolKind string
+}
+
+// ToolCallInputDeltaEvent is sent for each streamed fragment of tool call
+// arguments as they arrive from the LLM. Useful for live-previewing content
+// or showing a progress indicator with byte count.
+type ToolCallInputDeltaEvent struct {
+	// ToolCallID is the stable identifier for correlating tool lifecycle events.
+	ToolCallID string
+	// Delta is a JSON fragment of tool call arguments.
+	Delta string
+}
+
+// ToolCallInputEndEvent is sent when tool argument streaming is complete,
+// before the tool call is parsed and execution begins.
+type ToolCallInputEndEvent struct {
+	// ToolCallID is the stable identifier for correlating tool lifecycle events.
+	ToolCallID string
+}
+
 // ToolExecutionEvent is sent when a tool starts or finishes executing.
 // The IsStarting flag distinguishes between the start and end of execution.
 type ToolExecutionEvent struct {
@@ -79,6 +109,24 @@ type ToolCallContentEvent struct {
 	Content string
 }

+// PasswordPromptEvent is sent when a sudo command needs a password.
+// The TUI should display a password prompt overlay and send the result back.
+type PasswordPromptEvent struct {
+	// Prompt is the message to display to the user.
+	Prompt string
+	// ResponseCh receives the password from the TUI.
+	// The TUI must send exactly one value.
+	ResponseCh chan<- PasswordPromptResponse
+}
+
+// PasswordPromptResponse carries the user's password input.
+type PasswordPromptResponse struct {
+	// Password is the entered password.
+	Password string
+	// Cancelled is true if the user cancelled the prompt.
+	Cancelled bool
+}
+
 // ResponseCompleteEvent is sent when the LLM produces a final (non-streaming) response.
 // In streaming mode, this may be empty if all content was delivered via StreamChunkEvents.
 type ResponseCompleteEvent struct {
@@ -162,6 +210,12 @@ type ModelChangedEvent struct {
 	ModelName string
 }

+// UsageUpdatedEvent is sent after each completed LLM step to notify the TUI
+// that token counts and costs have changed. The UsageTracker is updated
+// in-place before this event is sent; the TUI just needs to re-render to
+// reflect the new values in the status bar.
+type UsageUpdatedEvent struct{}
+
 // WidgetUpdateEvent is sent when an extension adds, updates, or removes a
 // widget via ctx.SetWidget or ctx.RemoveWidget. The TUI re-reads widget state
 // from its WidgetProvider on the next render cycle.
@@ -172,6 +226,20 @@ type WidgetUpdateEvent struct{}
 // its autocomplete entries and internal state from the provider callbacks.
 type ContentReloadEvent struct{}

+// MCPToolsReadyEvent is sent when background MCP tool loading completes.
+// The TUI refreshes its tool names and MCP tool count from provider callbacks
+// so that /tools and the startup info bar reflect the loaded MCP tools.
+type MCPToolsReadyEvent struct{}
+
+// MCPServerLoadedEvent is sent when a single MCP server finishes loading
+// (successfully or with error). The TUI displays a system message so users
+// see real-time progress as each server initializes.
+type MCPServerLoadedEvent struct {
+	ServerName string
+	ToolCount  int
+	Error      error // nil on success
+}
+
 // EditorTextSetEvent is sent when an extension calls ctx.SetEditorText to
 // pre-fill the input editor with text. The TUI handles this by setting the
 // textarea content and moving the cursor to the end.
@@ -179,6 +247,21 @@ type EditorTextSetEvent struct {
 	Text string
 }

+// NewSessionRequestEvent is sent when an extension calls ctx.NewSession to
+// end the current session and start a fresh one. The TUI routes this into
+// the same /new code path (including the BeforeSessionSwitch hook and any
+// @file expansion in InitialPrompt). ResponseCh, when non-nil, receives a
+// single result so the extension goroutine can observe success or failure.
+type NewSessionRequestEvent struct {
+	// InitialPrompt, when non-empty, is the first user turn to submit
+	// after the session switch. @file references are expanded.
+	InitialPrompt string
+	// ResponseCh receives the outcome (nil error on success). Must be
+	// buffered (cap >= 1) so the TUI never blocks. May be nil if the
+	// caller does not need the result.
+	ResponseCh chan<- error
+}
+
 // ExtensionPrintEvent is sent when an extension calls ctx.Print, ctx.PrintInfo,
 // ctx.PrintError, or ctx.PrintBlock. The TUI renders it via the appropriate
 // renderer and tea.Println (scrollback); the CLI handler uses
@@ -13,11 +13,6 @@ type MessageStore struct {
 	messages []kit.LLMMessage
 }

-// NewMessageStore creates an empty MessageStore.
-func NewMessageStore() *MessageStore {
-	return &MessageStore{}
-}
-
 // NewMessageStoreWithMessages creates a MessageStore pre-populated with the
 // given messages. This is used when loading an existing session at startup.
 func NewMessageStoreWithMessages(msgs []kit.LLMMessage) *MessageStore {
@@ -3,24 +3,21 @@ package app
 import (
 	"testing"

-	"charm.land/fantasy"
-
 	kit "github.com/mark3labs/kit/pkg/kit"
 )

-// makeTextMsg builds a minimal kit.LLMMessage using fantasy.NewUserMessage
-// or constructing with the given role.
+// makeTextMsg builds a minimal kit.LLMMessage with the given role and text.
 func makeTextMsg(role, text string) kit.LLMMessage {
 	return kit.LLMMessage{
 		Role:    kit.LLMMessageRole(role),
-		Content: []fantasy.MessagePart{fantasy.TextPart{Text: text}},
+		Content: []kit.LLMMessagePart{kit.LLMTextPart{Text: text}},
 	}
 }

 // textOf extracts the plain text from an LLMMessage for assertions.
 func textOf(msg kit.LLMMessage) string {
 	for _, part := range msg.Content {
-		if tp, ok := part.(fantasy.TextPart); ok {
+		if tp, ok := part.(kit.LLMTextPart); ok {
 			return tp.Text
 		}
 	}
@@ -32,7 +29,7 @@ func textOf(msg kit.LLMMessage) string {
 // --------------------------------------------------------------------------

 func TestNewMessageStore_empty(t *testing.T) {
-	s := NewMessageStore()
+	s := NewMessageStoreWithMessages(nil)
 	if s == nil {
 		t.Fatal("expected non-nil store")
 	}
@@ -75,7 +72,7 @@ func TestNewMessageStoreWithMessages_isolatesInput(t *testing.T) {
 // --------------------------------------------------------------------------

 func TestAdd_appendsMessage(t *testing.T) {
-	s := NewMessageStore()
+	s := NewMessageStoreWithMessages(nil)
 	s.Add(makeTextMsg("user", "first"))
 	s.Add(makeTextMsg("assistant", "second"))

@@ -85,7 +82,7 @@ func TestAdd_appendsMessage(t *testing.T) {
 }

 func TestAdd_preservesOrder(t *testing.T) {
-	s := NewMessageStore()
+	s := NewMessageStoreWithMessages(nil)
 	texts := []string{"a", "b", "c"}
 	for _, t2 := range texts {
 		s.Add(makeTextMsg("user", t2))
@@ -103,7 +100,7 @@ func TestAdd_preservesOrder(t *testing.T) {
 // --------------------------------------------------------------------------

 func TestReplace_swapsHistory(t *testing.T) {
-	s := NewMessageStore()
+	s := NewMessageStoreWithMessages(nil)
 	s.Add(makeTextMsg("user", "old"))

 	replacement := []kit.LLMMessage{
@@ -123,7 +120,7 @@ func TestReplace_swapsHistory(t *testing.T) {

 // Replace must deep-copy the incoming slice.
 func TestReplace_isolatesInput(t *testing.T) {
-	s := NewMessageStore()
+	s := NewMessageStoreWithMessages(nil)
 	replacement := []kit.LLMMessage{makeTextMsg("user", "original")}
 	s.Replace(replacement)

@@ -140,7 +137,7 @@ func TestReplace_isolatesInput(t *testing.T) {
 // --------------------------------------------------------------------------

 func TestGetAll_returnsCopy(t *testing.T) {
-	s := NewMessageStore()
+	s := NewMessageStoreWithMessages(nil)
 	s.Add(makeTextMsg("user", "hello"))

 	got := s.GetAll()
@@ -154,7 +151,7 @@ func TestGetAll_returnsCopy(t *testing.T) {
 }

 func TestGetAll_emptyStore(t *testing.T) {
-	s := NewMessageStore()
+	s := NewMessageStoreWithMessages(nil)
 	got := s.GetAll()
 	if len(got) != 0 {
 		t.Fatalf("expected empty slice, got %d elements", len(got))
@@ -166,7 +163,7 @@ func TestGetAll_emptyStore(t *testing.T) {
 // --------------------------------------------------------------------------

 func TestClear_removesAllMessages(t *testing.T) {
-	s := NewMessageStore()
+	s := NewMessageStoreWithMessages(nil)
 	s.Add(makeTextMsg("user", "a"))
 	s.Add(makeTextMsg("user", "b"))
 	s.Clear()
@@ -177,7 +174,7 @@ func TestClear_removesAllMessages(t *testing.T) {
 }

 func TestClear_allowsSubsequentAdds(t *testing.T) {
-	s := NewMessageStore()
+	s := NewMessageStoreWithMessages(nil)
 	s.Add(makeTextMsg("user", "before"))
 	s.Clear()
 	s.Add(makeTextMsg("user", "after"))
@@ -196,7 +193,7 @@ func TestClear_allowsSubsequentAdds(t *testing.T) {
 // --------------------------------------------------------------------------

 func TestConcurrentAccess(t *testing.T) {
-	s := NewMessageStore()
+	s := NewMessageStoreWithMessages(nil)
 	done := make(chan struct{})

 	// Writer goroutine.
@@ -21,8 +21,10 @@ type UsageUpdater interface {
 	// the provider does not return exact counts.
 	EstimateAndUpdateUsage(inputText, outputText string)
 	// SetContextTokens records the approximate current context window fill
-	// level. This should be the final API call's input+output tokens (from
-	// FinalResponse.Usage), NOT the aggregate TotalUsage.
+	// level. This should be the sum of ALL token categories from the last
+	// API call: InputTokens + CacheReadTokens + CacheCreationTokens +
+	// OutputTokens. With Anthropic prompt caching, InputTokens can be
+	// near-zero while CacheReadTokens holds the bulk of the context.
 	SetContextTokens(tokens int)
 }

@@ -1,6 +1,7 @@
 package auth

 import (
+	"context"
 	"encoding/json"
 	"fmt"
 	"os"
@@ -9,11 +10,11 @@ import (
 	"time"
 )

-// CredentialStore holds all stored credentials for various providers.
-// Currently supports Anthropic and OpenAI credentials with both OAuth and API key authentication methods.
+// CredentialStore holds stored credentials for Anthropic, OpenAI, and GitHub Copilot.
 type CredentialStore struct {
 	Anthropic *AnthropicCredentials `json:"anthropic,omitempty"`
 	OpenAI    *OpenAICredentials    `json:"openai,omitempty"`
+	Copilot   *CopilotCredentials   `json:"copilot,omitempty"`
 }

 // AnthropicCredentials holds Anthropic API credentials supporting both OAuth
@@ -43,6 +44,16 @@ type OpenAICredentials struct {
 	CreatedAt    time.Time `json:"created_at"`
 }

+// CopilotCredentials holds GitHub OAuth credentials and the short-lived
+// GitHub Copilot API token derived from them.
+type CopilotCredentials struct {
+	Type               string    `json:"type"`                           // "oauth"
+	GitHubToken        string    `json:"github_token,omitempty"`         // GitHub device-flow OAuth token
+	CopilotAccessToken string    `json:"copilot_access_token,omitempty"` // Short-lived Copilot API token
+	ExpiresAt          int64     `json:"expires_at,omitempty"`           // Copilot token expiry
+	CreatedAt          time.Time `json:"created_at"`
+}
+
 // oauthTokenExpired reports whether an OAuth token with the given type and
 // expiry unix timestamp is past its expiry. Returns false for API key
 // credentials or when no expiry is set.
@@ -91,6 +102,16 @@ func (c *OpenAICredentials) NeedsRefresh() bool {
 	return oauthTokenNeedsRefresh(c.Type, c.ExpiresAt)
 }

+// IsExpired checks if the Copilot API token is expired.
+func (c *CopilotCredentials) IsExpired() bool {
+	return oauthTokenExpired(c.Type, c.ExpiresAt)
+}
+
+// NeedsRefresh reports whether the Copilot API token should be renewed.
+func (c *CopilotCredentials) NeedsRefresh() bool {
+	return oauthTokenNeedsRefresh(c.Type, c.ExpiresAt)
+}
+
 // CredentialManager handles secure storage and retrieval of authentication credentials.
 // It manages a JSON file stored in the user's config directory with appropriate
 // file permissions for security.
@@ -222,7 +243,7 @@ func (cm *CredentialManager) RemoveAnthropicCredentials() error {
 	store.Anthropic = nil

 	// If store is empty, remove the file entirely
-	if store.Anthropic == nil {
+	if store.Anthropic == nil && store.OpenAI == nil && store.Copilot == nil {
 		if err := os.Remove(cm.credentialsPath); err != nil && !os.IsNotExist(err) {
 			return fmt.Errorf("failed to remove credentials file: %w", err)
 		}
@@ -255,29 +276,6 @@ func (cm *CredentialManager) HasAnthropicCredentials() (bool, error) {
 	}
 }

-// SetOpenAICredentials stores OpenAI API key credentials. It validates the
-// API key format before storing. The API key must start with "sk-" and be
-// at least 20 characters long. Returns an error if the API key is invalid or
-// if storage fails.
-func (cm *CredentialManager) SetOpenAICredentials(apiKey string) error {
-	if err := validateOpenAIAPIKey(apiKey); err != nil {
-		return err
-	}
-
-	store, err := cm.LoadCredentials()
-	if err != nil {
-		return err
-	}
-
-	store.OpenAI = &OpenAICredentials{
-		Type:      "api_key",
-		APIKey:    apiKey,
-		CreatedAt: time.Now(),
-	}
-
-	return cm.SaveCredentials(store)
-}
-
 // GetOpenAICredentials retrieves stored OpenAI credentials. Returns nil if
 // no credentials are stored. The returned credentials may be either OAuth or API
 // key type, check the Type field to determine which.
@@ -302,7 +300,7 @@ func (cm *CredentialManager) RemoveOpenAICredentials() error {
 	store.OpenAI = nil

 	// If store is empty, remove the file entirely
-	if store.Anthropic == nil && store.OpenAI == nil {
+	if store.Anthropic == nil && store.OpenAI == nil && store.Copilot == nil {
 		if err := os.Remove(cm.credentialsPath); err != nil && !os.IsNotExist(err) {
 			return fmt.Errorf("failed to remove credentials file: %w", err)
 		}
@@ -312,6 +310,104 @@ func (cm *CredentialManager) RemoveOpenAICredentials() error {
 	return cm.SaveCredentials(store)
 }

+// GetCopilotCredentials retrieves stored GitHub Copilot credentials.
+func (cm *CredentialManager) GetCopilotCredentials() (*CopilotCredentials, error) {
+	store, err := cm.LoadCredentials()
+	if err != nil {
+		return nil, err
+	}
+
+	return store.Copilot, nil
+}
+
+// RemoveCopilotCredentials removes stored GitHub Copilot credentials.
+func (cm *CredentialManager) RemoveCopilotCredentials() error {
+	store, err := cm.LoadCredentials()
+	if err != nil {
+		return err
+	}
+
+	store.Copilot = nil
+
+	if store.Anthropic == nil && store.OpenAI == nil && store.Copilot == nil {
+		if err := os.Remove(cm.credentialsPath); err != nil && !os.IsNotExist(err) {
+			return fmt.Errorf("failed to remove credentials file: %w", err)
+		}
+		return nil
+	}
+
+	return cm.SaveCredentials(store)
+}
+
+// HasCopilotCredentials checks if valid GitHub Copilot credentials are stored.
+func (cm *CredentialManager) HasCopilotCredentials() (bool, error) {
+	creds, err := cm.GetCopilotCredentials()
+	if err != nil {
+		return false, err
+	}
+	if creds == nil {
+		return false, nil
+	}
+
+	return creds.Type == "oauth" && creds.GitHubToken != "", nil
+}
+
+// SetCopilotOAuthCredentials stores GitHub Copilot OAuth credentials.
+func (cm *CredentialManager) SetCopilotOAuthCredentials(creds *CopilotCredentials) error {
+	store, err := cm.LoadCredentials()
+	if err != nil {
+		return err
+	}
+
+	store.Copilot = creds
+	return cm.SaveCredentials(store)
+}
+
+// GetValidCopilotAccessToken returns a fresh Copilot API token, renewing it
+// with the stored GitHub OAuth token when needed.
+func (cm *CredentialManager) GetValidCopilotAccessToken() (string, error) {
+	return cm.GetValidCopilotAccessTokenContext(context.Background())
+}
+
+// GetValidCopilotAccessTokenContext returns a fresh Copilot API token, renewing
+// it with the stored GitHub OAuth token when needed.
+func (cm *CredentialManager) GetValidCopilotAccessTokenContext(ctx context.Context) (string, error) {
+	if ctx == nil {
+		ctx = context.Background()
+	}
+
+	creds, err := cm.GetCopilotCredentials()
+	if err != nil {
+		return "", err
+	}
+	if creds == nil {
+		return "", fmt.Errorf("no Copilot credentials found")
+	}
+	if creds.Type != "oauth" {
+		return "", fmt.Errorf("unknown credential type: %s", creds.Type)
+	}
+	if creds.GitHubToken == "" {
+		return "", fmt.Errorf("GitHub OAuth token missing from Copilot credentials")
+	}
+
+	if creds.CopilotAccessToken == "" || creds.NeedsRefresh() {
+		client := NewCopilotOAuthClient()
+		newCreds, err := client.RefreshCopilotToken(ctx, creds.GitHubToken)
+		if err != nil {
+			return "", fmt.Errorf("failed to refresh Copilot token: %w", err)
+		}
+		newCreds.CreatedAt = creds.CreatedAt
+
+		if err := cm.SetCopilotOAuthCredentials(newCreds); err != nil {
+			return "", fmt.Errorf("failed to save refreshed Copilot token: %w", err)
+		}
+
+		return newCreds.CopilotAccessToken, nil
+	}
+
+	return creds.CopilotAccessToken, nil
+}
+
 // HasOpenAICredentials checks if valid OpenAI credentials are stored.
 // Returns true if either a non-empty OAuth access token or API key is present,
 // false otherwise. Returns an error if credentials cannot be loaded.
@@ -417,24 +513,18 @@ func validateAnthropicAPIKey(apiKey string) error {
 	return nil
 }

-// validateOpenAIAPIKey validates the format of an OpenAI API key
-func validateOpenAIAPIKey(apiKey string) error {
-	apiKey = strings.TrimSpace(apiKey)
+// CredentialSourceOAuth is the source description returned by
+// GetAnthropicAPIKey when the key resolves to stored OAuth credentials.
+// Consumers should compare against this constant (or use IsAnthropicOAuth)
+// rather than matching the string literal.
+const CredentialSourceOAuth = "stored OAuth credentials"

-	if apiKey == "" {
-		return fmt.Errorf("API key cannot be empty")
-	}
-
-	// OpenAI API keys typically start with "sk-" and are quite long
-	if !strings.HasPrefix(apiKey, "sk-") {
-		return fmt.Errorf("invalid OpenAI API key format (should start with 'sk-')")
-	}
-
-	if len(apiKey) < 20 {
-		return fmt.Errorf("API key appears to be too short")
-	}
-
-	return nil
+// IsAnthropicOAuth reports whether the active Anthropic credential resolves
+// to a stored OAuth token (in which case the user is not billed per-token).
+// flagValue is the --provider-api-key flag value (may be empty).
+func IsAnthropicOAuth(flagValue string) bool {
+	_, source, err := GetAnthropicAPIKey(flagValue)
+	return err == nil && source == CredentialSourceOAuth
 }

 // GetAnthropicAPIKey retrieves an Anthropic API key from multiple sources in priority order:
@@ -459,7 +549,7 @@ func GetAnthropicAPIKey(flagValue string) (string, string, error) {
 				if err != nil {
 					return "", "", fmt.Errorf("failed to get valid OAuth token: %w", err)
 				}
-				return token, "stored OAuth credentials", nil
+				return token, CredentialSourceOAuth, nil
 			} else if creds.Type == "api_key" && creds.APIKey != "" {
 				return creds.APIKey, "stored API key", nil
 			}
@@ -471,5 +561,13 @@ func GetAnthropicAPIKey(flagValue string) (string, string, error) {
 		return envKey, "ANTHROPIC_API_KEY environment variable", nil
 	}

+	// Check if OpenAI credentials exist to provide a helpful suggestion
+	if cm != nil {
+		hasOpenAI, _ := cm.HasOpenAICredentials()
+		if hasOpenAI {
+			return "", "", fmt.Errorf("no Anthropic API key found. Use 'kit auth login anthropic', set ANTHROPIC_API_KEY environment variable, or use --provider-api-key flag\n\nNote: OpenAI credentials were detected. To use OpenAI, run with --model openai/gpt-5.4 or set it as default:\n  kit auth login openai --set-default")
+		}
+	}
+
 	return "", "", fmt.Errorf("no Anthropic API key found. Use 'kit auth login anthropic', set ANTHROPIC_API_KEY environment variable, or use --provider-api-key flag")
 }
@@ -4,6 +4,7 @@ import (
 	"os"
 	"path/filepath"
 	"testing"
+	"time"
 )

 func TestCredentialManager(t *testing.T) {
@@ -215,6 +216,7 @@ func TestCredentialStorePersistence(t *testing.T) {
 	if err != nil {
 		t.Fatalf("Failed to create temp dir: %v", err)
 	}
+
 	defer func() { _ = os.RemoveAll(tempDir) }()

 	credentialsPath := filepath.Join(tempDir, "credentials.json")
@@ -252,3 +254,98 @@ func TestCredentialStorePersistence(t *testing.T) {
 		t.Errorf("Expected file permissions 0600, got %v", info.Mode().Perm())
 	}
 }
+
+func TestCopilotCredentials(t *testing.T) {
+	tempDir, err := os.MkdirTemp("", "kit-auth-test")
+	if err != nil {
+		t.Fatalf("Failed to create temp dir: %v", err)
+	}
+	defer func() { _ = os.RemoveAll(tempDir) }()
+
+	cm := &CredentialManager{
+		credentialsPath: filepath.Join(tempDir, "credentials.json"),
+	}
+
+	creds := &CopilotCredentials{
+		Type:               "oauth",
+		GitHubToken:        "github-token",
+		CopilotAccessToken: "copilot-token",
+		ExpiresAt:          time.Now().Add(time.Hour).Unix(),
+		CreatedAt:          time.Now(),
+	}
+
+	if err := cm.SetCopilotOAuthCredentials(creds); err != nil {
+		t.Fatalf("SetCopilotOAuthCredentials failed: %v", err)
+	}
+
+	hasAuth, err := cm.HasCopilotCredentials()
+	if err != nil {
+		t.Fatalf("HasCopilotCredentials failed: %v", err)
+	}
+	if !hasAuth {
+		t.Fatal("Expected Copilot credentials")
+	}
+
+	token, err := cm.GetValidCopilotAccessToken()
+	if err != nil {
+		t.Fatalf("GetValidCopilotAccessToken failed: %v", err)
+	}
+	if token != creds.CopilotAccessToken {
+		t.Fatalf("Expected Copilot token %q, got %q", creds.CopilotAccessToken, token)
+	}
+
+	if err := cm.RemoveCopilotCredentials(); err != nil {
+		t.Fatalf("RemoveCopilotCredentials failed: %v", err)
+	}
+	hasAuth, err = cm.HasCopilotCredentials()
+	if err != nil {
+		t.Fatalf("HasCopilotCredentials after removal failed: %v", err)
+	}
+	if hasAuth {
+		t.Fatal("Expected no Copilot credentials after removal")
+	}
+}
+
+func TestRemoveCredentialsPreservesOtherProviders(t *testing.T) {
+	tempDir, err := os.MkdirTemp("", "kit-auth-test")
+	if err != nil {
+		t.Fatalf("Failed to create temp dir: %v", err)
+	}
+	defer func() { _ = os.RemoveAll(tempDir) }()
+
+	cm := &CredentialManager{
+		credentialsPath: filepath.Join(tempDir, "credentials.json"),
+	}
+
+	if err := cm.SetOpenAIOAuthCredentials(&OpenAICredentials{
+		Type:         "oauth",
+		AccessToken:  "openai-token",
+		RefreshToken: "refresh-token",
+		ExpiresAt:    time.Now().Add(time.Hour).Unix(),
+		AccountID:    "account",
+		CreatedAt:    time.Now(),
+	}); err != nil {
+		t.Fatalf("SetOpenAIOAuthCredentials failed: %v", err)
+	}
+	if err := cm.SetCopilotOAuthCredentials(&CopilotCredentials{
+		Type:               "oauth",
+		GitHubToken:        "github-token",
+		CopilotAccessToken: "copilot-token",
+		ExpiresAt:          time.Now().Add(time.Hour).Unix(),
+		CreatedAt:          time.Now(),
+	}); err != nil {
+		t.Fatalf("SetCopilotOAuthCredentials failed: %v", err)
+	}
+
+	if err := cm.RemoveCopilotCredentials(); err != nil {
+		t.Fatalf("RemoveCopilotCredentials failed: %v", err)
+	}
+
+	hasOpenAI, err := cm.HasOpenAICredentials()
+	if err != nil {
+		t.Fatalf("HasOpenAICredentials failed: %v", err)
+	}
+	if !hasOpenAI {
+		t.Fatal("Expected OpenAI credentials to remain after removing Copilot credentials")
+	}
+}
@@ -10,6 +10,7 @@ import (
 	"io"
 	"net/http"
 	"net/url"
+	"strconv"
 	"strings"
 	"time"
 )
@@ -211,6 +212,262 @@ type OpenAIOAuthClient struct {
 	Scopes       string
 }

+// CopilotOAuthClient handles GitHub device-flow OAuth and exchanges the
+// GitHub token for a short-lived GitHub Copilot API token.
+//
+// The GitHub token comes from GitHub's OAuth device flow. It is then presented
+// to GitHub's internal Copilot token endpoint, which returns the bearer token
+// used by api.githubcopilot.com.
+type CopilotOAuthClient struct {
+	ClientID      string
+	DeviceURL     string
+	TokenURL      string
+	CopilotURL    string
+	Scopes        string
+	PollTimeout   time.Duration
+	ClientTimeout time.Duration
+}
+
+// CopilotDeviceCode contains data returned by GitHub's device-code endpoint.
+type CopilotDeviceCode struct {
+	DeviceCode      string `json:"device_code"`
+	UserCode        string `json:"user_code"`
+	VerificationURI string `json:"verification_uri"`
+	ExpiresIn       int    `json:"expires_in"`
+	Interval        int    `json:"interval"`
+}
+
+// NewCopilotOAuthClient creates a GitHub Copilot OAuth client.
+func NewCopilotOAuthClient() *CopilotOAuthClient {
+	return &CopilotOAuthClient{
+		ClientID:      "Iv1.b507a08c87ecfe98",
+		DeviceURL:     "https://github.com/login/device/code",
+		TokenURL:      "https://github.com/login/oauth/access_token",
+		CopilotURL:    "https://api.github.com/copilot_internal/v2/token",
+		Scopes:        "read:user",
+		PollTimeout:   15 * time.Minute,
+		ClientTimeout: 30 * time.Second,
+	}
+}
+
+// StartDeviceFlow requests a GitHub device code for browser login.
+//
+// The returned user code and verification URI are displayed by loginCopilot.
+// GitHub's response may omit interval, so this method normalizes it to the
+// documented five-second default.
+func (c *CopilotOAuthClient) StartDeviceFlow(ctx context.Context) (*CopilotDeviceCode, error) {
+	if ctx == nil {
+		ctx = context.Background()
+	}
+
+	data := url.Values{
+		"client_id": {c.ClientID},
+		"scope":     {c.Scopes},
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "POST", c.DeviceURL, strings.NewReader(data.Encode()))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create device-code request: %w", err)
+	}
+	req.Header.Set("Accept", "application/json")
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+
+	resp, err := (&http.Client{Timeout: c.ClientTimeout}).Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to request device code: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("device-code request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	var code CopilotDeviceCode
+	if err := json.NewDecoder(resp.Body).Decode(&code); err != nil {
+		return nil, fmt.Errorf("failed to decode device-code response: %w", err)
+	}
+	if code.DeviceCode == "" || code.UserCode == "" || code.VerificationURI == "" {
+		return nil, fmt.Errorf("device-code response missing required fields")
+	}
+	if code.Interval <= 0 {
+		code.Interval = 5
+	}
+	return &code, nil
+}
+
+// PollDeviceToken waits until the user authorizes the device code and returns
+// the resulting GitHub OAuth token.
+//
+// It follows GitHub's device-flow polling contract: authorization_pending keeps
+// polling, slow_down increases the interval, and polling stops at the earlier of
+// the client timeout or the device-code expiry.
+func (c *CopilotOAuthClient) PollDeviceToken(ctx context.Context, deviceCode *CopilotDeviceCode) (string, error) {
+	if ctx == nil {
+		ctx = context.Background()
+	}
+
+	if deviceCode == nil || deviceCode.DeviceCode == "" {
+		return "", fmt.Errorf("device code missing")
+	}
+
+	deadline := time.Now().Add(c.PollTimeout)
+	if deviceCode.ExpiresIn > 0 {
+		expiresAt := time.Now().Add(time.Duration(deviceCode.ExpiresIn) * time.Second)
+		if expiresAt.Before(deadline) {
+			deadline = expiresAt
+		}
+	}
+
+	interval := time.Duration(deviceCode.Interval) * time.Second
+	if interval <= 0 {
+		interval = 5 * time.Second
+	}
+
+	for time.Now().Before(deadline) {
+		wait := interval
+		if remaining := time.Until(deadline); remaining < wait {
+			wait = remaining
+		}
+		select {
+		case <-ctx.Done():
+			return "", ctx.Err()
+		case <-time.After(wait):
+		}
+
+		data := url.Values{
+			"client_id":   {c.ClientID},
+			"device_code": {deviceCode.DeviceCode},
+			"grant_type":  {"urn:ietf:params:oauth:grant-type:device_code"},
+		}
+
+		req, err := http.NewRequestWithContext(ctx, "POST", c.TokenURL, strings.NewReader(data.Encode()))
+		if err != nil {
+			return "", fmt.Errorf("failed to create device-token request: %w", err)
+		}
+		req.Header.Set("Accept", "application/json")
+		req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+
+		resp, err := (&http.Client{Timeout: c.ClientTimeout}).Do(req)
+		if err != nil {
+			return "", fmt.Errorf("failed to poll device token: %w", err)
+		}
+
+		var tokenResp struct {
+			AccessToken string `json:"access_token"`
+			Error       string `json:"error"`
+			Description string `json:"error_description"`
+		}
+		decodeErr := json.NewDecoder(resp.Body).Decode(&tokenResp)
+		_ = resp.Body.Close()
+		if decodeErr != nil {
+			return "", fmt.Errorf("failed to decode device-token response: %w", decodeErr)
+		}
+
+		if tokenResp.AccessToken != "" {
+			return tokenResp.AccessToken, nil
+		}
+
+		switch tokenResp.Error {
+		case "authorization_pending":
+			continue
+		case "slow_down":
+			interval += 5 * time.Second
+			continue
+		case "expired_token":
+			return "", fmt.Errorf("device code expired; restart login")
+		case "access_denied":
+			return "", fmt.Errorf("github login denied")
+		case "":
+			return "", fmt.Errorf("device-token request failed with status %d", resp.StatusCode)
+		default:
+			if tokenResp.Description != "" {
+				return "", fmt.Errorf("device-token request failed: %s: %s", tokenResp.Error, tokenResp.Description)
+			}
+			return "", fmt.Errorf("device-token request failed: %s", tokenResp.Error)
+		}
+	}
+
+	return "", fmt.Errorf("timed out waiting for github device authorization")
+}
+
+// ExchangeGitHubToken converts a GitHub OAuth token into a Copilot API token.
+// It is a semantic wrapper over RefreshCopilotToken used by the login flow.
+func (c *CopilotOAuthClient) ExchangeGitHubToken(ctx context.Context, githubToken string) (*CopilotCredentials, error) {
+	return c.RefreshCopilotToken(ctx, githubToken)
+}
+
+// RefreshCopilotToken obtains a fresh short-lived Copilot token from GitHub.
+//
+// GitHub may return expires_at as either a Unix timestamp or RFC3339 string.
+// parseCopilotExpiry handles both forms and falls back to a conservative
+// 20-minute lifetime when the field is absent or unrecognized.
+func (c *CopilotOAuthClient) RefreshCopilotToken(ctx context.Context, githubToken string) (*CopilotCredentials, error) {
+	if ctx == nil {
+		ctx = context.Background()
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "GET", c.CopilotURL, nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create copilot token request: %w", err)
+	}
+	req.Header.Set("Authorization", "token "+githubToken)
+	req.Header.Set("Accept", "application/json")
+	req.Header.Set("User-Agent", "kit")
+	req.Header.Set("X-GitHub-Api-Version", "2022-11-28")
+
+	resp, err := (&http.Client{Timeout: c.ClientTimeout}).Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to request copilot token: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("copilot token request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	var tokenResp struct {
+		Token     string `json:"token"`
+		ExpiresAt any    `json:"expires_at"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&tokenResp); err != nil {
+		return nil, fmt.Errorf("failed to decode copilot token response: %w", err)
+	}
+	if tokenResp.Token == "" {
+		return nil, fmt.Errorf("copilot token response missing token")
+	}
+
+	expiresAt := parseCopilotExpiry(tokenResp.ExpiresAt)
+	if expiresAt == 0 {
+		expiresAt = time.Now().Add(20 * time.Minute).Unix()
+	}
+
+	return &CopilotCredentials{
+		Type:               "oauth",
+		GitHubToken:        githubToken,
+		CopilotAccessToken: tokenResp.Token,
+		ExpiresAt:          expiresAt,
+		CreatedAt:          time.Now(),
+	}, nil
+}
+
+// parseCopilotExpiry normalizes GitHub's expires_at variants to a Unix second.
+func parseCopilotExpiry(value any) int64 {
+	switch v := value.(type) {
+	case float64:
+		return int64(v)
+	case string:
+		if parsed, err := strconv.ParseInt(v, 10, 64); err == nil {
+			return parsed
+		}
+		if parsed, err := time.Parse(time.RFC3339, v); err == nil {
+			return parsed.Unix()
+		}
+	}
+	return 0
+}
+
 // NewOpenAIOAuthClient creates a new OAuth client configured for OpenAI Codex OAuth.
 // This uses the public client ID for CLI applications with PKCE for security.
 func NewOpenAIOAuthClient() *OpenAIOAuthClient {
@@ -0,0 +1,124 @@
+package auth
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+)
+
+func TestCopilotStartDeviceFlow(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost {
+			t.Fatalf("expected POST, got %s", r.Method)
+		}
+		if err := r.ParseForm(); err != nil {
+			t.Fatalf("ParseForm failed: %v", err)
+		}
+		if r.Form.Get("client_id") != "client-id" {
+			t.Fatalf("expected client id, got %q", r.Form.Get("client_id"))
+		}
+		if r.Form.Get("scope") != "read:user" {
+			t.Fatalf("expected scope, got %q", r.Form.Get("scope"))
+		}
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"device_code":      "device-code",
+			"user_code":        "USER-CODE",
+			"verification_uri": "https://github.com/login/device",
+			"expires_in":       600,
+			"interval":         1,
+		})
+	}))
+	defer server.Close()
+
+	client := NewCopilotOAuthClient()
+	client.ClientID = "client-id"
+	client.DeviceURL = server.URL
+
+	code, err := client.StartDeviceFlow(context.Background())
+	if err != nil {
+		t.Fatalf("StartDeviceFlow failed: %v", err)
+	}
+	if code.DeviceCode != "device-code" || code.UserCode != "USER-CODE" || code.Interval != 1 {
+		t.Fatalf("unexpected device code: %#v", code)
+	}
+}
+
+func TestCopilotPollDeviceToken(t *testing.T) {
+	polls := 0
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		polls++
+		if r.Method != http.MethodPost {
+			t.Fatalf("expected POST, got %s", r.Method)
+		}
+		if err := r.ParseForm(); err != nil {
+			t.Fatalf("ParseForm failed: %v", err)
+		}
+		if r.Form.Get("grant_type") != "urn:ietf:params:oauth:grant-type:device_code" {
+			t.Fatalf("unexpected grant type: %q", r.Form.Get("grant_type"))
+		}
+		if polls == 1 {
+			_ = json.NewEncoder(w).Encode(map[string]any{"error": "authorization_pending"})
+			return
+		}
+		_ = json.NewEncoder(w).Encode(map[string]any{"access_token": "github-token"})
+	}))
+	defer server.Close()
+
+	client := NewCopilotOAuthClient()
+	client.ClientID = "client-id"
+	client.TokenURL = server.URL
+	client.PollTimeout = 5 * time.Second
+	client.ClientTimeout = time.Second
+
+	token, err := client.PollDeviceToken(context.Background(), &CopilotDeviceCode{
+		DeviceCode: "device-code",
+		ExpiresIn:  10,
+		Interval:   1,
+	})
+	if err != nil {
+		t.Fatalf("PollDeviceToken failed: %v", err)
+	}
+	if token != "github-token" {
+		t.Fatalf("expected github-token, got %q", token)
+	}
+	if polls != 2 {
+		t.Fatalf("expected 2 polls, got %d", polls)
+	}
+}
+
+func TestCopilotRefreshToken(t *testing.T) {
+	expiresAt := time.Now().Add(time.Hour).Unix()
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodGet {
+			t.Fatalf("expected GET, got %s", r.Method)
+		}
+		if r.Header.Get("Authorization") != "token github-token" {
+			t.Fatalf("unexpected authorization header: %q", r.Header.Get("Authorization"))
+		}
+		if r.Header.Get("User-Agent") != "kit" {
+			t.Fatalf("unexpected user agent: %q", r.Header.Get("User-Agent"))
+		}
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"token":      "copilot-token",
+			"expires_at": expiresAt,
+		})
+	}))
+	defer server.Close()
+
+	client := NewCopilotOAuthClient()
+	client.CopilotURL = server.URL
+
+	creds, err := client.RefreshCopilotToken(context.Background(), "github-token")
+	if err != nil {
+		t.Fatalf("RefreshCopilotToken failed: %v", err)
+	}
+	if creds.GitHubToken != "github-token" || creds.CopilotAccessToken != "copilot-token" {
+		t.Fatalf("unexpected credentials: %#v", creds)
+	}
+	if creds.ExpiresAt != expiresAt {
+		t.Fatalf("expected expires_at %d, got %d", expiresAt, creds.ExpiresAt)
+	}
+}
@@ -389,6 +389,30 @@ func roleLabel(role fantasy.MessageRole) string {
 	}
 }

+// skillContentMarkers are substrings that identify a message carrying
+// explicitly-activated skill content. Such messages are exempt from
+// compaction pruning per the agentskills.io spec (issue #65, gap #7): an
+// activated skill must remain in context verbatim instead of being folded
+// into a lossy summary.
+var skillContentMarkers = []string{"<skill ", "<skill>", "<skill_content"}
+
+// isProtectedMessage reports whether msg carries explicitly-activated skill
+// content that must survive compaction unchanged.
+func isProtectedMessage(msg fantasy.Message) bool {
+	for _, part := range msg.Content {
+		tp, ok := part.(fantasy.TextPart)
+		if !ok {
+			continue
+		}
+		for _, marker := range skillContentMarkers {
+			if strings.Contains(tp.Text, marker) {
+				return true
+			}
+		}
+	}
+	return false
+}
+
 // serializeMessages converts a slice of fantasy messages into a plain-text
 // representation suitable for sending to the summarisation LLM. Tool result
 // text is truncated to maxToolResultChars to keep the summarisation request
@@ -518,6 +542,14 @@ func Compact(

 	newMessages := make([]fantasy.Message, 0, 1+len(recentMessages))
 	newMessages = append(newMessages, summaryMessage)
+	// Carry forward any explicitly-activated skill content from the
+	// summarised range verbatim — skill instructions must not be lost to
+	// compaction (issue #65, gap #7).
+	for _, msg := range oldMessages {
+		if isProtectedMessage(msg) {
+			newMessages = append(newMessages, msg)
+		}
+	}
 	newMessages = append(newMessages, recentMessages...)

 	compactedTokens := EstimateMessageTokens(newMessages)
@@ -439,3 +439,25 @@ func TestSortedKeys_Empty(t *testing.T) {
 		t.Errorf("sortedKeys(nil) = %v, want nil", got)
 	}
 }
+
+// ---------------------------------------------------------------------------
+// Skill-content protection (issue #65, gap #7)
+// ---------------------------------------------------------------------------
+
+func TestIsProtectedMessage(t *testing.T) {
+	cases := []struct {
+		text string
+		want bool
+	}{
+		{`<skill name="foo" location="/x">body</skill>`, true},
+		{`<skill_content name="foo">body</skill_content>`, true},
+		{"just a normal message", false},
+		{"talking about skills in general", false},
+	}
+	for _, c := range cases {
+		msg := makeTextMessage(fantasy.MessageRoleUser, c.text)
+		if got := isProtectedMessage(msg); got != c.want {
+			t.Errorf("isProtectedMessage(%q) = %v, want %v", c.text, got, c.want)
+		}
+	}
+}
@@ -6,6 +6,7 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+	"sync"

 	"github.com/spf13/viper"
 	"gopkg.in/yaml.v3"
@@ -22,6 +23,45 @@ type MCPServerConfig struct {
 	AllowedTools  []string          `json:"allowedTools,omitempty" yaml:"allowedTools,omitempty"`
 	ExcludedTools []string          `json:"excludedTools,omitempty" yaml:"excludedTools,omitempty"`

+	// OAuth configuration for remote servers that don't support dynamic
+	// client registration (e.g. GitHub). When OAuthClientID is set, it is
+	// passed directly to the transport's OAuthConfig instead of relying on
+	// dynamic registration.
+	OAuthClientID     string   `json:"oauthClientId,omitempty" yaml:"oauthClientId,omitempty"`
+	OAuthClientSecret string   `json:"oauthClientSecret,omitempty" yaml:"oauthClientSecret,omitempty"`
+	OAuthScopes       []string `json:"oauthScopes,omitempty" yaml:"oauthScopes,omitempty"`
+
+	// NoOAuth disables OAuth transport configuration for this server, even
+	// when the connection pool has an auth handler. Use this for public MCP
+	// servers (e.g. PubMed) that don't require authentication. Without this
+	// flag, the pool would attach OAuth transport to every remote server,
+	// causing proactive dynamic-client-registration attempts that fail on
+	// servers that don't support it.
+	NoOAuth bool `json:"noOAuth,omitempty" yaml:"noOAuth,omitempty"`
+
+	// TasksMode controls when this server's tools/call requests are augmented
+	// with MCP task metadata (turning a synchronous call into an asynchronous,
+	// pollable job — see https://modelcontextprotocol.io/specification/2025-11-25/basic/utilities/tasks).
+	//
+	// Valid values:
+	//   - "" or "auto": (default) augment requests with task metadata only
+	//     when the server advertises tasks/toolCalls capability during initialize.
+	//   - "never":      never augment — every tool call is synchronous, regardless
+	//     of server capability.
+	//   - "always":     always augment, even when the server didn't advertise
+	//     task support. The server may still respond synchronously; this just
+	//     opts in unconditionally on the client side.
+	//
+	// In all modes, when the server returns a CreateTaskResult the client polls
+	// tasks/get / tasks/result until the task reaches a terminal state.
+	TasksMode string `json:"tasksMode,omitempty" yaml:"tasksMode,omitempty"`
+
+	// InProcessServer holds a live *server.MCPServer for in-process transport.
+	// When set (and Type is "inprocess"), the connection pool creates an
+	// in-process client instead of spawning a subprocess or making HTTP calls.
+	// This field is never serialized — it is only used programmatically via the SDK.
+	InProcessServer any `json:"-" yaml:"-"`
+
 	// Legacy fields for backward compatibility
 	Transport string         `json:"transport,omitempty"`
 	Args      []string       `json:"args,omitempty"`
@@ -35,13 +75,18 @@ type MCPServerConfig struct {
 func (s *MCPServerConfig) UnmarshalJSON(data []byte) error {
 	// First try to unmarshal as the new format
 	type newFormat struct {
-		Type          string            `json:"type"`
-		Command       []string          `json:"command,omitempty"`
-		Environment   map[string]string `json:"environment,omitempty"`
-		URL           string            `json:"url,omitempty"`
-		Headers       []string          `json:"headers,omitempty"`
-		AllowedTools  []string          `json:"allowedTools,omitempty" yaml:"allowedTools,omitempty"`
-		ExcludedTools []string          `json:"excludedTools,omitempty" yaml:"excludedTools,omitempty"`
+		Type              string            `json:"type"`
+		Command           []string          `json:"command,omitempty"`
+		Environment       map[string]string `json:"environment,omitempty"`
+		URL               string            `json:"url,omitempty"`
+		Headers           []string          `json:"headers,omitempty"`
+		AllowedTools      []string          `json:"allowedTools,omitempty" yaml:"allowedTools,omitempty"`
+		ExcludedTools     []string          `json:"excludedTools,omitempty" yaml:"excludedTools,omitempty"`
+		OAuthClientID     string            `json:"oauthClientId,omitempty" yaml:"oauthClientId,omitempty"`
+		OAuthClientSecret string            `json:"oauthClientSecret,omitempty" yaml:"oauthClientSecret,omitempty"`
+		OAuthScopes       []string          `json:"oauthScopes,omitempty" yaml:"oauthScopes,omitempty"`
+		NoOAuth           bool              `json:"noOAuth,omitempty" yaml:"noOAuth,omitempty"`
+		TasksMode         string            `json:"tasksMode,omitempty" yaml:"tasksMode,omitempty"`
 	}

 	// Also try legacy format
@@ -54,6 +99,7 @@ func (s *MCPServerConfig) UnmarshalJSON(data []byte) error {
 		Headers       []string       `json:"headers,omitempty"`
 		AllowedTools  []string       `json:"allowedTools,omitempty" yaml:"allowedTools,omitempty"`
 		ExcludedTools []string       `json:"excludedTools,omitempty" yaml:"excludedTools,omitempty"`
+		TasksMode     string         `json:"tasksMode,omitempty" yaml:"tasksMode,omitempty"`
 	}

 	// Try new format first
@@ -66,6 +112,11 @@ func (s *MCPServerConfig) UnmarshalJSON(data []byte) error {
 		s.Headers = newConfig.Headers
 		s.AllowedTools = newConfig.AllowedTools
 		s.ExcludedTools = newConfig.ExcludedTools
+		s.OAuthClientID = newConfig.OAuthClientID
+		s.OAuthClientSecret = newConfig.OAuthClientSecret
+		s.OAuthScopes = newConfig.OAuthScopes
+		s.NoOAuth = newConfig.NoOAuth
+		s.TasksMode = newConfig.TasksMode
 		return nil
 	}

@@ -86,6 +137,7 @@ func (s *MCPServerConfig) UnmarshalJSON(data []byte) error {
 	s.Headers = legacyConfig.Headers
 	s.AllowedTools = legacyConfig.AllowedTools
 	s.ExcludedTools = legacyConfig.ExcludedTools
+	s.TasksMode = legacyConfig.TasksMode

 	// Infer type from legacy format for better compatibility
 	// Only set Type when it doesn't change existing transport behavior
@@ -157,20 +209,41 @@ type Theme struct {
 	Markdown MarkdownThemeConfig `json:"markdown,omitzero" yaml:"markdown,omitempty"`
 }

+// GenerationParams defines generation parameter defaults that can be attached
+// to individual models. These act as model-level defaults — CLI flags and
+// global config values take precedence when explicitly set.
+type GenerationParams struct {
+	MaxTokens        *int     `json:"maxTokens,omitempty" yaml:"maxTokens,omitempty"`
+	Temperature      *float32 `json:"temperature,omitempty" yaml:"temperature,omitempty"`
+	TopP             *float32 `json:"topP,omitempty" yaml:"topP,omitempty"`
+	TopK             *int32   `json:"topK,omitempty" yaml:"topK,omitempty"`
+	FrequencyPenalty *float32 `json:"frequencyPenalty,omitempty" yaml:"frequencyPenalty,omitempty"`
+	PresencePenalty  *float32 `json:"presencePenalty,omitempty" yaml:"presencePenalty,omitempty"`
+	StopSequences    []string `json:"stopSequences,omitempty" yaml:"stopSequences,omitempty"`
+	ThinkingLevel    string   `json:"thinkingLevel,omitempty" yaml:"thinkingLevel,omitempty"`
+	SystemPrompt     string   `json:"systemPrompt,omitempty" yaml:"systemPrompt,omitempty"`
+}
+
 // CustomModelConfig defines a custom model that can be used with custom/custom
 // or other custom/ prefixed models. These models are loaded from the config file
 // and merged into the custom provider in the model registry.
 type CustomModelConfig struct {
-	Name        string      `json:"name" yaml:"name"`
-	BaseURL     string      `json:"baseUrl,omitempty" yaml:"baseUrl,omitempty"`
-	APIKey      string      `json:"apiKey,omitempty" yaml:"apiKey,omitempty"`
-	Family      string      `json:"family,omitempty" yaml:"family,omitempty"`
-	Attachment  bool        `json:"attachment,omitempty" yaml:"attachment,omitempty"`
-	Reasoning   bool        `json:"reasoning,omitempty" yaml:"reasoning,omitempty"`
-	Temperature bool        `json:"temperature,omitempty" yaml:"temperature,omitempty"`
-	Knowledge   string      `json:"knowledge,omitempty" yaml:"knowledge,omitempty"`
-	Cost        CostConfig  `json:"cost" yaml:"cost"`
-	Limit       LimitConfig `json:"limit" yaml:"limit"`
+	Name         string      `json:"name" yaml:"name"`
+	BaseURL      string      `json:"baseUrl,omitempty" yaml:"baseUrl,omitempty"`
+	APIKey       string      `json:"apiKey,omitempty" yaml:"apiKey,omitempty"`
+	APIModelName string      `json:"apiModelName,omitempty" yaml:"apiModelName,omitempty"`
+	Family       string      `json:"family,omitempty" yaml:"family,omitempty"`
+	Attachment   bool        `json:"attachment,omitempty" yaml:"attachment,omitempty"`
+	Reasoning    bool        `json:"reasoning,omitempty" yaml:"reasoning,omitempty"`
+	Temperature  bool        `json:"temperature,omitempty" yaml:"temperature,omitempty"`
+	Knowledge    string      `json:"knowledge,omitempty" yaml:"knowledge,omitempty"`
+	Cost         CostConfig  `json:"cost" yaml:"cost"`
+	Limit        LimitConfig `json:"limit" yaml:"limit"`
+
+	// Generation parameter defaults for this model.
+	// These are applied when the user hasn't explicitly set the corresponding
+	// CLI flag or global config value.
+	Params GenerationParams `json:"params,omitzero" yaml:"params,omitempty"`
 }

 // CostConfig defines the pricing for a custom model.
@@ -219,6 +292,12 @@ type Config struct {

 	// Custom model definitions (under custom/ provider)
 	CustomModels map[string]CustomModelConfig `json:"customModels,omitempty" yaml:"customModels,omitempty"`
+
+	// Per-model generation parameter overrides. Keys are "provider/model" strings
+	// (e.g. "anthropic/claude-sonnet-4-5-20250929", "openai/gpt-4o"). These
+	// settings act as model-level defaults — CLI flags and global config values
+	// take precedence when explicitly set.
+	ModelSettings map[string]GenerationParams `json:"modelSettings,omitempty" yaml:"modelSettings,omitempty"`
 }

 // GetTransportType returns the transport type for the server config, mapping
@@ -237,11 +316,18 @@ func (s *MCPServerConfig) GetTransportType() string {
 			return "stdio"
 		case "remote":
 			return "streamable"
+		case "inprocess":
+			return "inprocess"
 		default:
 			return s.Type
 		}
 	}

+	// Programmatic in-process server detection.
+	if s.InProcessServer != nil {
+		return "inprocess"
+	}
+
 	// Backward compatibility: infer transport type
 	if len(s.Command) > 0 {
 		return "stdio"
@@ -261,6 +347,17 @@ func (c *Config) Validate() error {
 			return fmt.Errorf("server %s: allowedTools and excludedTools are mutually exclusive", serverName)
 		}

+		// Reject unknown tasksMode values up front so a typo (e.g. "alwasy")
+		// fails loud here instead of being silently downgraded to "auto" by
+		// the runtime parser. Comparison is case-insensitive to match
+		// tools.ParseTaskMode.
+		switch strings.ToLower(strings.TrimSpace(serverConfig.TasksMode)) {
+		case "", "auto", "never", "always":
+			// ok
+		default:
+			return fmt.Errorf("server %s: invalid tasksMode %q (expected one of: auto, never, always)", serverName, serverConfig.TasksMode)
+		}
+
 		transport := serverConfig.GetTransportType()
 		switch transport {
 		case "stdio":
@@ -272,8 +369,12 @@ func (c *Config) Validate() error {
 			if serverConfig.URL == "" {
 				return fmt.Errorf("server %s: url is required for %s transport", serverName, transport)
 			}
+		case "inprocess":
+			if serverConfig.InProcessServer == nil {
+				return fmt.Errorf("server %s: InProcessServer is required for inprocess transport", serverName)
+			}
 		default:
-			return fmt.Errorf("server %s: unsupported transport type '%s'. Supported types: stdio, sse, streamable", serverName, transport)
+			return fmt.Errorf("server %s: unsupported transport type '%s'. Supported types: stdio, sse, streamable, inprocess", serverName, transport)
 		}
 	}
 	return nil
@@ -367,7 +468,7 @@ mcpServers:
 # debug: false                                 # Enable debug logging
 # system-prompt: "/path/to/system-prompt.txt" # System prompt text file

-# Model generation parameters (all optional)
+# Model generation parameters (all optional, apply globally to all models)
 # max-tokens: 4096                             # Maximum tokens in response
 # temperature: 0.7                             # Randomness (0.0-1.0)
 # top-p: 0.95                                  # Nucleus sampling (0.0-1.0)
@@ -376,9 +477,67 @@ mcpServers:
 # presence-penalty: 0.0                         # Penalize present tokens (0.0-2.0)
 # stop-sequences: ["Human:", "Assistant:"]     # Custom stop sequences

+# Per-model generation parameter overrides (apply to specific models)
+# These act as model-level defaults — CLI flags and global settings above take precedence.
+# Keys are "provider/model" strings matching the model you use.
+# modelSettings:
+#   anthropic/claude-sonnet-4-5-20250929:
+#     temperature: 0.3
+#     maxTokens: 8192
+#   openai/gpt-4o:
+#     temperature: 0.7
+#     topP: 0.95
+#     topK: 40
+#     frequencyPenalty: 0.1
+#     presencePenalty: 0.1
+#   anthropic/claude-opus-4-6:
+#     thinkingLevel: "high"
+#     maxTokens: 16384
+#     systemPrompt: "You are a deep reasoning assistant."  # or a file path
+
+# Skills configuration (all optional)
+# no-skills: false                          # Set to true to disable all skill loading
+# skill:                                    # Explicit skill files/dirs (disables auto-discovery)
+#   - "/path/to/skill.md"
+# skills-dir: "/path/to/skills"            # Scan this directory directly for skills (overrides auto-discovery)
+# skill-disable:                            # Hide skills from the model catalog by name (still usable via /skill:)
+#   - "some-skill"
+#
+# Skill files follow the agentskills.io spec. A SKILL.md frontmatter block
+# supports these fields:
+#   name: my-skill                          # required
+#   description: Use when ...               # required (basis for model discovery)
+#   license: MIT                            # optional SPDX identifier
+#   compatibility: claude-code, cursor      # optional targeted-environment note
+#   allowed-tools: read, bash               # optional (experimental) tool restriction
+#   disable-model-invocation: false         # optional; true hides from the catalog
+#   metadata:                               # optional arbitrary key/value pairs
+#     author: you
+#   tags: [example]                         # Kit extension
+#   when: on-demand                         # Kit extension
+
 # API Configuration (can also use environment variables)
 # provider-api-key: "your-api-key"         # API key for OpenAI, Anthropic, or Google
 # provider-url: "https://api.openai.com/v1" # Base URL for OpenAI, Anthropic, or Ollama
+
+# Custom model definitions (under custom/ provider)
+# customModels:
+#   my-local-llama:
+#     name: "Local Llama 3"
+#     baseUrl: "http://localhost:8080/v1"
+#     family: "llama"
+#     temperature: true
+#     cost:
+#       input: 0.0
+#       output: 0.0
+#     limit:
+#       context: 131072
+#       output: 8192
+#     params:                              # Generation parameter defaults for this model
+#       temperature: 0.8
+#       topP: 0.95
+#       topK: 40
+#       systemPrompt: "You are a helpful local assistant."
 `

 	_, err = file.WriteString(content)
@@ -411,7 +570,7 @@ func FilepathOr[T any](key string, value *T) error {
 				absPath = filepath.Join(home, absPath[2:])
 			}
 			if !filepath.IsAbs(absPath) {
-				base := configPath
+				base := GetConfigPath()
 				if base == "" {
 					fmt.Fprintf(os.Stderr, "unable to build relative path to config.")
 					os.Exit(1)
@@ -438,11 +597,24 @@ func FilepathOr[T any](key string, value *T) error {
 	return nil
 }

-var configPath string
+var (
+	configPathMu sync.RWMutex
+	configPath   string
+)

 // SetConfigPath sets the configuration file path for resolving relative paths
 // in configuration values. This should be called when the configuration file
-// location is known.
+// location is known. It is safe for concurrent use.
 func SetConfigPath(path string) {
+	configPathMu.Lock()
+	defer configPathMu.Unlock()
 	configPath = path
 }
+
+// GetConfigPath returns the configuration file path previously set via
+// SetConfigPath. It is safe for concurrent use.
+func GetConfigPath() string {
+	configPathMu.RLock()
+	defer configPathMu.RUnlock()
+	return configPath
+}
@@ -6,6 +6,8 @@ import (
 	"path/filepath"
 	"strings"
 	"testing"
+
+	"gopkg.in/yaml.v3"
 )

 func TestMCPServerConfig_NewFormat(t *testing.T) {
@@ -203,6 +205,9 @@ func TestEnsureConfigExists(t *testing.T) {
 		"type: \"local\"",
 		"type: \"remote\"",
 		"Core tools",
+		"# Skills configuration",
+		"no-skills:",
+		"skills-dir:",
 	}

 	for _, expected := range expectedSections {
@@ -542,3 +547,175 @@ func TestEnsureConfigExistsWhenFileExists(t *testing.T) {
 		t.Error("Existing config file was modified when it shouldn't have been")
 	}
 }
+
+func TestMCPServerConfig_OAuthFields_JSON(t *testing.T) {
+	jsonData := `{
+		"type": "remote",
+		"url": "https://api.githubcopilot.com/mcp/",
+		"oauthClientId": "Ov23liXXXXXXXXXXXXXX",
+		"oauthClientSecret": "secret123",
+		"oauthScopes": ["read:user", "repo"]
+	}`
+
+	var cfg MCPServerConfig
+	err := json.Unmarshal([]byte(jsonData), &cfg)
+	if err != nil {
+		t.Fatalf("Failed to unmarshal: %v", err)
+	}
+
+	if cfg.Type != "remote" {
+		t.Errorf("Expected type 'remote', got %q", cfg.Type)
+	}
+	if cfg.URL != "https://api.githubcopilot.com/mcp/" {
+		t.Errorf("Expected URL, got %q", cfg.URL)
+	}
+	if cfg.OAuthClientID != "Ov23liXXXXXXXXXXXXXX" {
+		t.Errorf("Expected OAuthClientID 'Ov23liXXXXXXXXXXXXXX', got %q", cfg.OAuthClientID)
+	}
+	if cfg.OAuthClientSecret != "secret123" {
+		t.Errorf("Expected OAuthClientSecret 'secret123', got %q", cfg.OAuthClientSecret)
+	}
+	if len(cfg.OAuthScopes) != 2 || cfg.OAuthScopes[0] != "read:user" || cfg.OAuthScopes[1] != "repo" {
+		t.Errorf("Expected OAuthScopes [read:user, repo], got %v", cfg.OAuthScopes)
+	}
+}
+
+func TestMCPServerConfig_OAuthFields_YAML(t *testing.T) {
+	yamlData := `
+type: remote
+url: https://api.githubcopilot.com/mcp/
+oauthClientId: "Ov23liXXXXXXXXXXXXXX"
+oauthScopes:
+  - read:user
+  - repo
+`
+
+	var cfg MCPServerConfig
+	err := yaml.Unmarshal([]byte(yamlData), &cfg)
+	if err != nil {
+		t.Fatalf("Failed to unmarshal YAML: %v", err)
+	}
+
+	if cfg.Type != "remote" {
+		t.Errorf("Expected type 'remote', got %q", cfg.Type)
+	}
+	if cfg.OAuthClientID != "Ov23liXXXXXXXXXXXXXX" {
+		t.Errorf("Expected OAuthClientID 'Ov23liXXXXXXXXXXXXXX', got %q", cfg.OAuthClientID)
+	}
+	if len(cfg.OAuthScopes) != 2 || cfg.OAuthScopes[0] != "read:user" || cfg.OAuthScopes[1] != "repo" {
+		t.Errorf("Expected OAuthScopes [read:user, repo], got %v", cfg.OAuthScopes)
+	}
+}
+
+func TestMCPServerConfig_OAuthFields_Omitted(t *testing.T) {
+	// Verify that omitting OAuth fields still works (backward compat).
+	jsonData := `{
+		"type": "remote",
+		"url": "https://example.com/mcp"
+	}`
+
+	var cfg MCPServerConfig
+	err := json.Unmarshal([]byte(jsonData), &cfg)
+	if err != nil {
+		t.Fatalf("Failed to unmarshal: %v", err)
+	}
+
+	if cfg.OAuthClientID != "" {
+		t.Errorf("Expected empty OAuthClientID, got %q", cfg.OAuthClientID)
+	}
+	if cfg.OAuthClientSecret != "" {
+		t.Errorf("Expected empty OAuthClientSecret, got %q", cfg.OAuthClientSecret)
+	}
+	if len(cfg.OAuthScopes) != 0 {
+		t.Errorf("Expected empty OAuthScopes, got %v", cfg.OAuthScopes)
+	}
+}
+
+func TestMCPServerConfig_TasksMode_NewFormat(t *testing.T) {
+	jsonData := `{
+		"type": "remote",
+		"url": "https://my-mcp-server.com",
+		"tasksMode": "always"
+	}`
+	var cfg MCPServerConfig
+	if err := json.Unmarshal([]byte(jsonData), &cfg); err != nil {
+		t.Fatalf("Failed to unmarshal: %v", err)
+	}
+	if cfg.TasksMode != "always" {
+		t.Errorf("expected TasksMode 'always', got %q", cfg.TasksMode)
+	}
+}
+
+func TestMCPServerConfig_TasksMode_LegacyFormat(t *testing.T) {
+	// tasksMode also recognised in the legacy unmarshal path so users on
+	// the older command/args shape can opt in without migrating.
+	jsonData := `{
+		"command": "npx",
+		"args": ["@modelcontextprotocol/server-filesystem", "/path"],
+		"tasksMode": "never"
+	}`
+	var cfg MCPServerConfig
+	if err := json.Unmarshal([]byte(jsonData), &cfg); err != nil {
+		t.Fatalf("Failed to unmarshal: %v", err)
+	}
+	if cfg.TasksMode != "never" {
+		t.Errorf("expected TasksMode 'never', got %q", cfg.TasksMode)
+	}
+}
+
+func TestMCPServerConfig_TasksMode_DefaultEmpty(t *testing.T) {
+	// When tasksMode is not set the field stays empty, which downstream
+	// resolves to "auto" via tools.ParseTaskMode.
+	jsonData := `{"type":"remote","url":"https://x.example"}`
+	var cfg MCPServerConfig
+	if err := json.Unmarshal([]byte(jsonData), &cfg); err != nil {
+		t.Fatalf("Failed to unmarshal: %v", err)
+	}
+	if cfg.TasksMode != "" {
+		t.Errorf("expected default TasksMode to be empty, got %q", cfg.TasksMode)
+	}
+}
+
+func TestConfig_Validate_TasksMode(t *testing.T) {
+	t.Run("empty is valid", func(t *testing.T) {
+		cfg := &Config{
+			MCPServers: map[string]MCPServerConfig{
+				"a": {Type: "remote", URL: "https://x.example"},
+			},
+		}
+		if err := cfg.Validate(); err != nil {
+			t.Errorf("empty TasksMode should validate, got %v", err)
+		}
+	})
+
+	t.Run("known values are valid", func(t *testing.T) {
+		for _, mode := range []string{"auto", "never", "always", "AUTO", " always "} {
+			cfg := &Config{
+				MCPServers: map[string]MCPServerConfig{
+					"a": {Type: "remote", URL: "https://x.example", TasksMode: mode},
+				},
+			}
+			if err := cfg.Validate(); err != nil {
+				t.Errorf("TasksMode=%q should validate, got %v", mode, err)
+			}
+		}
+	})
+
+	t.Run("typo is rejected with a clear error", func(t *testing.T) {
+		cfg := &Config{
+			MCPServers: map[string]MCPServerConfig{
+				"buildbot": {Type: "remote", URL: "https://x.example", TasksMode: "alwasy"},
+			},
+		}
+		err := cfg.Validate()
+		if err == nil {
+			t.Fatal("expected validation error for invalid TasksMode")
+		}
+		// Error must mention the server name AND the bad value so the
+		// user knows where to look.
+		msg := err.Error()
+		if !strings.Contains(msg, "buildbot") || !strings.Contains(msg, `"alwasy"`) {
+			t.Errorf("error %q should mention both server name and bad value", msg)
+		}
+	})
+}
@@ -0,0 +1,33 @@
+package config
+
+import (
+	"sync"
+	"testing"
+)
+
+// TestConfigPathConcurrentAccess exercises the mutex guarding the package-level
+// configPath global. Run with -race to detect the data race that motivated the
+// guard (concurrent kit.New() calls discovering a .kit.yml).
+func TestConfigPathConcurrentAccess(t *testing.T) {
+	t.Cleanup(func() { SetConfigPath("") })
+
+	const goroutines = 32
+	var wg sync.WaitGroup
+	wg.Add(goroutines * 2)
+	for range goroutines {
+		go func() {
+			defer wg.Done()
+			SetConfigPath("/tmp/kit.yml")
+		}()
+		go func() {
+			defer wg.Done()
+			_ = GetConfigPath()
+		}()
+	}
+	wg.Wait()
+
+	SetConfigPath("/tmp/final.yml")
+	if got := GetConfigPath(); got != "/tmp/final.yml" {
+		t.Fatalf("GetConfigPath() = %q, want /tmp/final.yml", got)
+	}
+}
@@ -7,32 +7,48 @@ import (
 	"github.com/spf13/viper"
 )

-// LoadAndValidateConfig loads configuration from viper, fixes environment variable
-// casing issues, and validates the configuration. Returns an error if loading or
-// validation fails.
+// LoadAndValidateConfig loads configuration from the process-global viper
+// store, fixes environment variable casing issues, and validates the
+// configuration. Returns an error if loading or validation fails.
+//
+// This is a convenience wrapper around [LoadAndValidateConfigFrom] using the
+// shared global store; it is retained for the CLI and other callers that rely
+// on viper's process-global state.
 func LoadAndValidateConfig() (*Config, error) {
+	return LoadAndValidateConfigFrom(viper.GetViper())
+}
+
+// LoadAndValidateConfigFrom loads configuration from the supplied per-instance
+// store, fixes environment variable casing issues, and validates the
+// configuration. When v is nil, the process-global store is used. Threading an
+// explicit store lets each Kit instance own an isolated configuration without
+// clobbering other instances in the same process.
+func LoadAndValidateConfigFrom(v *viper.Viper) (*Config, error) {
+	if v == nil {
+		v = viper.GetViper()
+	}
 	config := &Config{
 		MCPServers: make(map[string]MCPServerConfig),
 	}
-	if err := viper.Unmarshal(config); err != nil {
-		return nil, fmt.Errorf("failed to unmarshal config: %v", err)
+	if err := v.Unmarshal(config); err != nil {
+		return nil, fmt.Errorf("failed to unmarshal config: %w", err)
 	}

 	// Fix environment variable case sensitivity issue
 	// Viper lowercases all keys, but we need to preserve the original case for environment variables
-	fixEnvironmentCase(config)
+	fixEnvironmentCase(v, config)

 	if err := config.Validate(); err != nil {
-		return nil, fmt.Errorf("invalid config: %v", err)
+		return nil, fmt.Errorf("invalid config: %w", err)
 	}

 	return config, nil
 }

 // fixEnvironmentCase fixes the case of environment variable keys that were lowercased by Viper
-func fixEnvironmentCase(config *Config) {
+func fixEnvironmentCase(v *viper.Viper, config *Config) {
 	// Get the raw config data from viper
-	rawConfig := viper.AllSettings()
+	rawConfig := v.AllSettings()

 	// Check if we have mcpServers in the raw config
 	if mcpServersRaw, ok := rawConfig["mcpservers"]; ok {
@@ -56,9 +56,3 @@ func (e *EnvSubstituter) SubstituteEnvVars(content string) (string, error) {

 	return result, nil
 }
-
-// HasEnvVars checks if content contains environment variable patterns (${env://...}).
-// This is useful for determining if substitution is needed before processing.
-func HasEnvVars(content string) bool {
-	return envVarPattern.MatchString(content)
-}
@@ -187,41 +187,3 @@ func TestEnvSubstituter_SubstituteEnvVars(t *testing.T) {
 		})
 	}
 }
-
-func TestHasEnvVars(t *testing.T) {
-	tests := []struct {
-		name     string
-		content  string
-		expected bool
-	}{
-		{
-			name:     "has env vars",
-			content:  `{"token": "${env://GITHUB_TOKEN}"}`,
-			expected: true,
-		},
-		{
-			name:     "has env vars with default",
-			content:  `{"debug": "${env://DEBUG:-false}"}`,
-			expected: true,
-		},
-		{
-			name:     "no env vars",
-			content:  `{"name": "${username}", "normal": "value"}`,
-			expected: false,
-		},
-		{
-			name:     "empty content",
-			content:  "",
-			expected: false,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := HasEnvVars(tt.content)
-			if result != tt.expected {
-				t.Errorf("Expected %v, got %v", tt.expected, result)
-			}
-		})
-	}
-}
@@ -19,10 +19,18 @@ import (
 // It receives tool call ID, tool name, output chunk, and whether it's stderr.
 type ToolOutputCallback func(toolCallID, toolName, chunk string, isStderr bool)

+// PasswordPromptCallback is the signature for password prompts.
+// It receives a prompt message and returns the password and whether it was cancelled.
+type PasswordPromptCallback func(prompt string) (password string, cancelled bool)
+
 // contextKey is a custom type for context keys to avoid collisions.
 type contextKey string

-const toolOutputCallbackKey contextKey = "toolOutputCallback"
+const (
+	toolOutputCallbackKey contextKey = "toolOutputCallback"
+	sudoPasswordKey       contextKey = "sudoPassword"
+	passwordPromptKey     contextKey = "passwordPrompt"
+)

 // ContextWithToolOutputCallback returns a new context with the tool output callback set.
 func ContextWithToolOutputCallback(ctx context.Context, callback ToolOutputCallback) context.Context {
@@ -37,6 +45,28 @@ func toolOutputCallbackFromContext(ctx context.Context) ToolOutputCallback {
 	return nil
 }

+// ContextWithPasswordPrompt returns a new context with the password prompt callback set.
+// This allows the TUI to show a modal password prompt when sudo needs a password.
+func ContextWithPasswordPrompt(ctx context.Context, callback PasswordPromptCallback) context.Context {
+	return context.WithValue(ctx, passwordPromptKey, callback)
+}
+
+// passwordPromptFromContext retrieves the password prompt callback from context.
+func passwordPromptFromContext(ctx context.Context) PasswordPromptCallback {
+	if cb, ok := ctx.Value(passwordPromptKey).(PasswordPromptCallback); ok {
+		return cb
+	}
+	return nil
+}
+
+// sudoPasswordFromContext retrieves the sudo password from context.
+func sudoPasswordFromContext(ctx context.Context) string {
+	if pw, ok := ctx.Value(sudoPasswordKey).(string); ok {
+		return pw
+	}
+	return ""
+}
+
 const defaultBashTimeout = 120 * time.Second
 const maxBashTimeout = 600 * time.Second

@@ -73,6 +103,57 @@ func NewBashTool(opts ...ToolOption) fantasy.AgentTool {
 	}
 }

+// sudoCommandRe matches sudo commands that need to be rewritten for -S mode.
+// It matches "sudo" as a word boundary, optionally preceded by environment variables.
+var sudoCommandRe = regexp.MustCompile(`(?i)(^|[&|;|]|\|\||&&)\s*(\w+=\S+\s+)?\bsudo\b`)
+
+// truncateCommand truncates a long command for display.
+func truncateCommand(cmd string, maxLen int) string {
+	if len(cmd) <= maxLen {
+		return cmd
+	}
+	return cmd[:maxLen-3] + "..."
+}
+
+// rewriteSudoForStdin rewrites sudo commands to use -S -p ” for stdin password input.
+// It transforms: sudo cmd → sudo -S -p ” cmd
+func rewriteSudoForStdin(command string) string {
+	// Find all matches and their positions
+	matches := sudoCommandRe.FindAllStringIndex(command, -1)
+	if matches == nil {
+		return command
+	}
+
+	// Build result from end to start to preserve indices
+	result := command
+	for i := len(matches) - 1; i >= 0; i-- {
+		match := matches[i]
+		start, end := match[0], match[1]
+		matchedText := result[start:end]
+
+		// Extract just the "sudo" part (after any prefix)
+		sudoIdx := strings.Index(strings.ToLower(matchedText), "sudo")
+		if sudoIdx == -1 {
+			continue
+		}
+		prefix := matchedText[:sudoIdx]
+		sudoPart := matchedText[sudoIdx:]
+
+		// Check if the text immediately after "sudo" in the result contains -S
+		afterSudo := result[end:]
+		if strings.HasPrefix(strings.TrimLeft(afterSudo, " \t"), "-S") {
+			// Already has -S flag, skip
+			continue
+		}
+
+		// Insert -S -p '' after "sudo"
+		newSudo := strings.Replace(sudoPart, "sudo", "sudo -S -p ''", 1)
+		result = result[:start] + prefix + newSudo + result[end:]
+	}
+
+	return result
+}
+
 func executeBash(ctx context.Context, call fantasy.ToolCall, workDir string) (fantasy.ToolResponse, error) {
 	var args bashArgs
 	if err := parseArgs(call.Input, &args); err != nil {
@@ -97,7 +178,47 @@ func executeBash(ctx context.Context, call fantasy.ToolCall, workDir string) (fa
 	cmdCtx, cancel := context.WithTimeout(ctx, timeout)
 	defer cancel()

-	cmd := exec.CommandContext(cmdCtx, "bash", "-c", args.Command)
+	// Check for sudo password in context or environment
+	sudoPassword := sudoPasswordFromContext(ctx)
+	if sudoPassword == "" {
+		sudoPassword = os.Getenv("SUDO_PASSWORD")
+	}
+	command := args.Command
+
+	// If command contains sudo and we don't have a password, check if sudo needs one
+	if sudoPassword == "" && sudoCommandRe.MatchString(command) {
+		// Check if sudo credentials are cached using sudo -n (non-interactive)
+		testCmd := exec.CommandContext(cmdCtx, "sudo", "-n", "true")
+		testCmd.Dir = workDir
+		if err := testCmd.Run(); err != nil {
+			// Sudo needs a password - try to prompt via callback
+			if promptCallback := passwordPromptFromContext(ctx); promptCallback != nil {
+				pw, cancelled := promptCallback("Sudo password required for: " + truncateCommand(args.Command, 60))
+				if cancelled {
+					return fantasy.NewTextErrorResponse("sudo password prompt cancelled"), nil
+				}
+				if pw == "" {
+					return fantasy.NewTextErrorResponse("no sudo password provided"), nil
+				}
+				sudoPassword = pw
+				command = rewriteSudoForStdin(command)
+			} else {
+				// No callback available - return error with helpful message
+				return fantasy.NewTextErrorResponse(
+					"This command requires sudo access. " +
+						"Please run 'sudo -v' in your terminal first to cache credentials, " +
+						"or set the SUDO_PASSWORD environment variable."), nil
+			}
+		}
+		// Credentials are cached or password was provided, proceed
+	}
+
+	// If we have a sudo password, rewrite the command to use sudo -S
+	if sudoPassword != "" && sudoCommandRe.MatchString(command) {
+		command = rewriteSudoForStdin(command)
+	}
+
+	cmd := exec.CommandContext(cmdCtx, "bash", "-c", command)
 	if workDir != "" {
 		cmd.Dir = workDir
 	}
@@ -115,44 +236,94 @@ func executeBash(ctx context.Context, call fantasy.ToolCall, workDir string) (fa

 	if outputCallback != nil {
 		// Streaming mode: use pipes to capture output as it arrives
-		return executeBashStreaming(cmdCtx, call, cmd, outputCallback)
+		return executeBashStreaming(cmdCtx, call, cmd, outputCallback, sudoPassword)
 	}

 	// Non-streaming mode: collect all output at once (original behavior)
-	return executeBashBuffered(cmdCtx, call, cmd)
+	return executeBashBuffered(cmdCtx, call, cmd, sudoPassword)
+}
+
+// setupBashPipes opens stdout/stderr pipes (plus an optional sudo stdin),
+// starts the command, and asynchronously writes the sudo password if any.
+// Returns the readers ready for the caller to consume. If setup fails,
+// errResp is non-nil and the readers must not be used; the caller should
+// return the response directly.
+func setupBashPipes(cmd *exec.Cmd, sudoPassword string) (stdout, stderr io.Reader, errResp *fantasy.ToolResponse) {
+	stdoutPipe, err := cmd.StdoutPipe()
+	if err != nil {
+		r := fantasy.NewTextErrorResponse("failed to create stdout pipe")
+		return nil, nil, &r
+	}
+	stderrPipe, err := cmd.StderrPipe()
+	if err != nil {
+		r := fantasy.NewTextErrorResponse("failed to create stderr pipe")
+		return nil, nil, &r
+	}
+
+	var stdinPipe io.WriteCloser
+	if sudoPassword != "" {
+		stdinPipe, err = cmd.StdinPipe()
+		if err != nil {
+			r := fantasy.NewTextErrorResponse("failed to create stdin pipe")
+			return nil, nil, &r
+		}
+	}
+
+	if err := cmd.Start(); err != nil {
+		r := fantasy.NewTextErrorResponse(fmt.Sprintf("failed to start command: %v", err))
+		return nil, nil, &r
+	}
+
+	if sudoPassword != "" && stdinPipe != nil {
+		go func() {
+			defer func() { _ = stdinPipe.Close() }()
+			_, _ = io.WriteString(stdinPipe, sudoPassword+"\n")
+		}()
+	}
+
+	return stdoutPipe, stderrPipe, nil
+}
+
+// interpretBashExit decodes cmd.Wait()'s error into an exit code, mapping
+// context-deadline-exceeded to a friendly "command timed out" response.
+// errResp is non-nil only when the caller should short-circuit and return
+// it directly (e.g. timeout).
+func interpretBashExit(waitErr error, cmdCtx context.Context) (exitCode int, errResp *fantasy.ToolResponse) {
+	if waitErr == nil {
+		return 0, nil
+	}
+	if exitErr, ok := waitErr.(*exec.ExitError); ok {
+		return exitErr.ExitCode(), nil
+	}
+	if cmdCtx.Err() == context.DeadlineExceeded {
+		r := fantasy.NewTextErrorResponse("command timed out")
+		return 0, &r
+	}
+	return 0, nil
 }

 // executeBashBuffered collects all output before returning (original behavior).
 // It uses explicit pipes (not cmd.Stdout) so that cmd.WaitDelay can forcibly
 // close them when grandchild processes hold pipe handles open after the
 // direct child exits.
-func executeBashBuffered(cmdCtx context.Context, call fantasy.ToolCall, cmd *exec.Cmd) (fantasy.ToolResponse, error) {
-	stdoutPipe, err := cmd.StdoutPipe()
-	if err != nil {
-		return fantasy.NewTextErrorResponse("failed to create stdout pipe"), nil
-	}
-	stderrPipe, err := cmd.StderrPipe()
-	if err != nil {
-		return fantasy.NewTextErrorResponse("failed to create stderr pipe"), nil
-	}
-
-	if err := cmd.Start(); err != nil {
-		return fantasy.NewTextErrorResponse(fmt.Sprintf("failed to start command: %v", err)), nil
+func executeBashBuffered(cmdCtx context.Context, _ fantasy.ToolCall, cmd *exec.Cmd, sudoPassword string) (fantasy.ToolResponse, error) {
+	stdoutPipe, stderrPipe, errResp := setupBashPipes(cmd, sudoPassword)
+	if errResp != nil {
+		return *errResp, nil
 	}

 	// Read pipes concurrently
 	var wg sync.WaitGroup
 	var stdout, stderr strings.Builder
-	var stdoutErr, stderrErr error

 	wg.Add(2)
 	go func() {
 		defer wg.Done()
-		_, stdoutErr = io.Copy(&stdout, stdoutPipe)
+		_, _ = io.Copy(&stdout, stdoutPipe)
 	}()
 	go func() {
 		defer wg.Done()
-		_, stderrErr = io.Copy(&stderr, stderrPipe)
+		_, _ = io.Copy(&stderr, stderrPipe)
 	}()

 	// Wait for the process to exit first. cmd.WaitDelay ensures that if
@@ -163,37 +334,19 @@ func executeBashBuffered(cmdCtx context.Context, call fantasy.ToolCall, cmd *exe
 	// Wait for pipe readers to finish draining.
 	wg.Wait()

-	// Ignore pipe read errors caused by WaitDelay force-closing —
-	// we still have whatever was read before the close.
-	_ = stdoutErr
-	_ = stderrErr
-
-	exitCode := 0
-	if waitErr != nil {
-		if exitErr, ok := waitErr.(*exec.ExitError); ok {
-			exitCode = exitErr.ExitCode()
-		} else if cmdCtx.Err() == context.DeadlineExceeded {
-			return fantasy.NewTextErrorResponse("command timed out"), nil
-		}
+	exitCode, errResp := interpretBashExit(waitErr, cmdCtx)
+	if errResp != nil {
+		return *errResp, nil
 	}

 	return buildBashResponse(stdout.String(), stderr.String(), exitCode)
 }

 // executeBashStreaming streams output as it arrives via the callback.
-func executeBashStreaming(cmdCtx context.Context, call fantasy.ToolCall, cmd *exec.Cmd, outputCallback ToolOutputCallback) (fantasy.ToolResponse, error) {
-	stdoutPipe, err := cmd.StdoutPipe()
-	if err != nil {
-		return fantasy.NewTextErrorResponse("failed to create stdout pipe"), nil
-	}
-	stderrPipe, err := cmd.StderrPipe()
-	if err != nil {
-		return fantasy.NewTextErrorResponse("failed to create stderr pipe"), nil
-	}
-
-	// Start command execution
-	if err := cmd.Start(); err != nil {
-		return fantasy.NewTextErrorResponse(fmt.Sprintf("failed to start command: %v", err)), nil
+func executeBashStreaming(cmdCtx context.Context, call fantasy.ToolCall, cmd *exec.Cmd, outputCallback ToolOutputCallback, sudoPassword string) (fantasy.ToolResponse, error) {
+	stdoutPipe, stderrPipe, errResp := setupBashPipes(cmd, sudoPassword)
+	if errResp != nil {
+		return *errResp, nil
 	}

 	// Stream stdout and stderr concurrently
@@ -230,20 +383,16 @@ func executeBashStreaming(cmdCtx context.Context, call fantasy.ToolCall, cmd *ex
 	// Wait for the process to exit. cmd.WaitDelay ensures that if pipes
 	// remain open (held by grandchild processes), they'll be forcibly closed
 	// after the grace period, which unblocks the scanners above.
-	err = cmd.Wait()
+	waitErr := cmd.Wait()

 	// Wait for the pipe readers to finish draining. This will complete
 	// quickly since cmd.Wait() (with WaitDelay) has already ensured
 	// the pipes are closed.
 	wg.Wait()

-	exitCode := 0
-	if err != nil {
-		if exitErr, ok := err.(*exec.ExitError); ok {
-			exitCode = exitErr.ExitCode()
-		} else if cmdCtx.Err() == context.DeadlineExceeded {
-			return fantasy.NewTextErrorResponse("command timed out"), nil
-		}
+	exitCode, errResp := interpretBashExit(waitErr, cmdCtx)
+	if errResp != nil {
+		return *errResp, nil
 	}

 	return buildBashResponse(strings.Join(stdoutChunks, "\n"), strings.Join(stderrChunks, "\n"), exitCode)
@@ -127,3 +127,72 @@ func TestBash_EmptyCommand(t *testing.T) {
 		t.Fatal("expected error for empty command")
 	}
 }
+
+func TestRewriteSudoForStdin(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name:     "simple sudo",
+			input:    "sudo apt update",
+			expected: "sudo -S -p '' apt update",
+		},
+		{
+			name:     "sudo with env var",
+			input:    "DEBIAN_FRONTEND=noninteractive sudo apt update",
+			expected: "DEBIAN_FRONTEND=noninteractive sudo -S -p '' apt update",
+		},
+		{
+			name:     "sudo in pipeline",
+			input:    "echo test | sudo tee /etc/test.conf",
+			expected: "echo test | sudo -S -p '' tee /etc/test.conf",
+		},
+		{
+			name:     "sudo after &&",
+			input:    "apt update && sudo apt upgrade",
+			expected: "apt update && sudo -S -p '' apt upgrade",
+		},
+		{
+			name:     "already has -S flag",
+			input:    "sudo -S apt update",
+			expected: "sudo -S apt update",
+		},
+		{
+			name:     "no sudo",
+			input:    "apt update && apt upgrade",
+			expected: "apt update && apt upgrade",
+		},
+		{
+			name:     "sudo in string (should not match)",
+			input:    "echo 'use sudo carefully'",
+			expected: "echo 'use sudo carefully'",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := rewriteSudoForStdin(tt.input)
+			if result != tt.expected {
+				t.Errorf("rewriteSudoForStdin(%q) = %q, want %q", tt.input, result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestSudoPasswordFromContext(t *testing.T) {
+	// Test with password in context
+	ctx := context.WithValue(context.Background(), sudoPasswordKey, "secret123")
+	pw := sudoPasswordFromContext(ctx)
+	if pw != "secret123" {
+		t.Errorf("expected password 'secret123', got %q", pw)
+	}
+
+	// Test without password
+	ctx = context.Background()
+	pw = sudoPasswordFromContext(ctx)
+	if pw != "" {
+		t.Errorf("expected empty password, got %q", pw)
+	}
+}
@@ -21,12 +21,9 @@ type Edit struct {
 }

 // editArgs holds the arguments for the edit tool.
-// Supports both single-edit mode (old_text/new_text) and multi-edit mode (edits array).
 type editArgs struct {
-	Path    string `json:"path"`
-	OldText string `json:"old_text"` // Single-edit mode
-	NewText string `json:"new_text"` // Single-edit mode
-	Edits   []Edit `json:"edits"`    // Multi-edit mode
+	Path  string `json:"path"`
+	Edits []Edit `json:"edits"`
 }

 // replacement represents a normalized edit ready for processing.
@@ -52,20 +49,12 @@ func NewEditTool(opts ...ToolOption) fantasy.AgentTool {
 	return &coreTool{
 		info: fantasy.ToolInfo{
 			Name:        "edit",
-			Description: "Edit a file by replacing exact text. Supports single edit via old_text/new_text, or multiple edits via the edits array. All edits in the array are matched against the original file content (non-incremental) and must be non-overlapping.",
+			Description: "Edit a file by replacing exact text. All edits in the array are matched against the original file content (non-incremental) and must be non-overlapping.",
 			Parameters: map[string]any{
 				"path": map[string]any{
 					"type":        "string",
 					"description": "Path to the file to edit (relative or absolute)",
 				},
-				"old_text": map[string]any{
-					"type":        "string",
-					"description": "Exact text to find and replace (single-edit mode). Must not be used with 'edits' array.",
-				},
-				"new_text": map[string]any{
-					"type":        "string",
-					"description": "New text to replace the old text with (single-edit mode). Must not be used with 'edits' array.",
-				},
 				"edits": map[string]any{
 					"type":        "array",
 					"description": "Array of edits for multi-region replacement. Each edit must have unique, non-overlapping old_text. All matches are against the original file content.",
@@ -85,7 +74,7 @@ func NewEditTool(opts ...ToolOption) fantasy.AgentTool {
 					},
 				},
 			},
-			Required: []string{"path"},
+			Required: []string{"path", "edits"},
 		},
 		handler: func(ctx context.Context, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
 			return executeEdit(ctx, call, cfg.WorkDir)
@@ -94,6 +83,9 @@ func NewEditTool(opts ...ToolOption) fantasy.AgentTool {
 }

 func executeEdit(ctx context.Context, call fantasy.ToolCall, workDir string) (fantasy.ToolResponse, error) {
+	if err := ctx.Err(); err != nil {
+		return fantasy.ToolResponse{}, err
+	}
 	var args editArgs
 	if err := parseArgs(call.Input, &args); err != nil {
 		return fantasy.NewTextErrorResponse("failed to parse arguments: " + err.Error()), nil
@@ -163,36 +155,11 @@ func executeEdit(ctx context.Context, call fantasy.ToolCall, workDir string) (fa
 }

 // normalizeEditInput validates and normalizes the edit input.
-// Returns error if both single-edit and multi-edit modes are used.
 func normalizeEditInput(args editArgs) ([]replacement, error) {
-	singleMode := args.OldText != "" || args.NewText != ""
-	multiMode := len(args.Edits) > 0
-
-	if singleMode && multiMode {
-		return nil, fmt.Errorf("cannot use old_text/new_text together with edits array")
+	if len(args.Edits) == 0 {
+		return nil, fmt.Errorf("edits array is required and must not be empty")
 	}

-	if !singleMode && !multiMode {
-		return nil, fmt.Errorf("must provide either old_text/new_text or edits array")
-	}
-
-	if singleMode {
-		if args.OldText == "" {
-			return nil, fmt.Errorf("old_text is required when using single-edit mode")
-		}
-		if args.NewText == "" {
-			return nil, fmt.Errorf("new_text is required when using single-edit mode")
-		}
-		return []replacement{{
-			oldText:     strings.ReplaceAll(args.OldText, "\r\n", "\n"),
-			newText:     strings.ReplaceAll(args.NewText, "\r\n", "\n"),
-			originalOld: args.OldText,
-			originalNew: args.NewText,
-			index:       0,
-		}}, nil
-	}
-
-	// Multi-edit mode
 	var reps []replacement
 	for i, edit := range args.Edits {
 		if edit.OldText == "" {
@@ -389,9 +389,11 @@ func TestExecuteEdit_ExactMatch(t *testing.T) {
 	writeFileOrFail(t, path, original)

 	input, _ := json.Marshal(editArgs{
-		Path:    path,
-		OldText: "fmt.Println(\"hello\")",
-		NewText: "fmt.Println(\"world\")",
+		Path: path,
+		Edits: []Edit{{
+			OldText: "fmt.Println(\"hello\")",
+			NewText: "fmt.Println(\"world\")",
+		}},
 	})

 	resp, err := executeEdit(t.Context(), fantasy.ToolCall{Input: string(input)}, dir)
@@ -426,9 +428,11 @@ func TestExecuteEdit_ExactMatch_DoesNotCorruptRest(t *testing.T) {
 	target := lines[49]
 	replacement := "REPLACED_LINE_50"
 	input, _ := json.Marshal(editArgs{
-		Path:    path,
-		OldText: target,
-		NewText: replacement,
+		Path: path,
+		Edits: []Edit{{
+			OldText: target,
+			NewText: replacement,
+		}},
 	})

 	resp, err := executeEdit(t.Context(), fantasy.ToolCall{Input: string(input)}, dir)
@@ -470,9 +474,11 @@ func TestExecuteEdit_FuzzyMatch_TrailingWhitespace(t *testing.T) {

 	// Search without trailing whitespace (common LLM behavior)
 	input, _ := json.Marshal(editArgs{
-		Path:    path,
-		OldText: "func foo() {\n\treturn 1\n}",
-		NewText: "func foo() {\n\treturn 2\n}",
+		Path: path,
+		Edits: []Edit{{
+			OldText: "func foo() {\n\treturn 1\n}",
+			NewText: "func foo() {\n\treturn 2\n}",
+		}},
 	})

 	resp, err := executeEdit(t.Context(), fantasy.ToolCall{Input: string(input)}, dir)
@@ -519,9 +525,11 @@ func TestExecuteEdit_FuzzyMatch_DoesNotCorruptRest(t *testing.T) {
 	search := strings.Repeat("x", 10) + "\n" + strings.Repeat("x", 10)
 	// But this matches lines 1-2, 2-3, etc. — should fail due to ambiguity.
 	input, _ := json.Marshal(editArgs{
-		Path:    path,
-		OldText: search,
-		NewText: "REPLACED",
+		Path: path,
+		Edits: []Edit{{
+			OldText: search,
+			NewText: "REPLACED",
+		}},
 	})

 	resp, err := executeEdit(t.Context(), fantasy.ToolCall{Input: string(input)}, dir)
@@ -546,9 +554,11 @@ func TestExecuteEdit_MultipleMatches_Fails(t *testing.T) {
 	writeFileOrFail(t, path, "hello\nworld\nhello\n")

 	input, _ := json.Marshal(editArgs{
-		Path:    path,
-		OldText: "hello",
-		NewText: "goodbye",
+		Path: path,
+		Edits: []Edit{{
+			OldText: "hello",
+			NewText: "goodbye",
+		}},
 	})

 	resp, err := executeEdit(t.Context(), fantasy.ToolCall{Input: string(input)}, dir)
@@ -575,9 +585,11 @@ func TestExecuteEdit_NoMatch_Fails(t *testing.T) {
 	writeFileOrFail(t, path, "hello world\n")

 	input, _ := json.Marshal(editArgs{
-		Path:    path,
-		OldText: "nonexistent text",
-		NewText: "replacement",
+		Path: path,
+		Edits: []Edit{{
+			OldText: "nonexistent text",
+			NewText: "replacement",
+		}},
 	})

 	resp, err := executeEdit(t.Context(), fantasy.ToolCall{Input: string(input)}, dir)
@@ -601,9 +613,11 @@ func TestExecuteEdit_CRLFNormalization(t *testing.T) {
 	writeFileOrFail(t, path, "line1\r\nline2\r\nline3\r\n")

 	input, _ := json.Marshal(editArgs{
-		Path:    path,
-		OldText: "line2",
-		NewText: "LINE2",
+		Path: path,
+		Edits: []Edit{{
+			OldText: "line2",
+			NewText: "LINE2",
+		}},
 	})

 	resp, err := executeEdit(t.Context(), fantasy.ToolCall{Input: string(input)}, dir)
@@ -622,8 +636,10 @@ func TestExecuteEdit_CRLFNormalization(t *testing.T) {

 func TestExecuteEdit_MissingPath(t *testing.T) {
 	input, _ := json.Marshal(editArgs{
-		OldText: "x",
-		NewText: "y",
+		Edits: []Edit{{
+			OldText: "x",
+			NewText: "y",
+		}},
 	})
 	resp, err := executeEdit(t.Context(), fantasy.ToolCall{Input: string(input)}, "")
 	if err != nil {
@@ -636,9 +652,11 @@ func TestExecuteEdit_MissingPath(t *testing.T) {

 func TestExecuteEdit_NonexistentFile(t *testing.T) {
 	input, _ := json.Marshal(editArgs{
-		Path:    "/tmp/nonexistent_edit_test_file_12345.go",
-		OldText: "x",
-		NewText: "y",
+		Path: "/tmp/nonexistent_edit_test_file_12345.go",
+		Edits: []Edit{{
+			OldText: "x",
+			NewText: "y",
+		}},
 	})
 	resp, err := executeEdit(t.Context(), fantasy.ToolCall{Input: string(input)}, "")
 	if err != nil {
@@ -661,9 +679,11 @@ func TestExecuteEdit_DiffContainsHunkHeader(t *testing.T) {
 	writeFileOrFail(t, path, strings.Join(lines, "\n")+"\n")

 	input, _ := json.Marshal(editArgs{
-		Path:    path,
-		OldText: "line_10_content",
-		NewText: "REPLACED",
+		Path: path,
+		Edits: []Edit{{
+			OldText: "line_10_content",
+			NewText: "REPLACED",
+		}},
 	})

 	resp, err := executeEdit(t.Context(), fantasy.ToolCall{Input: string(input)}, dir)
@@ -684,9 +704,11 @@ func TestExecuteEdit_MetadataContainsFileDiffs(t *testing.T) {
 	writeFileOrFail(t, path, "old content\n")

 	input, _ := json.Marshal(editArgs{
-		Path:    path,
-		OldText: "old content",
-		NewText: "new content",
+		Path: path,
+		Edits: []Edit{{
+			OldText: "old content",
+			NewText: "new content",
+		}},
 	})

 	resp, err := executeEdit(t.Context(), fantasy.ToolCall{Input: string(input)}, dir)
@@ -905,18 +927,14 @@ func TestExecuteEdit_MultiEdit_EmptyArray(t *testing.T) {
 	}
 }

-func TestExecuteEdit_MultiEdit_MixedWithSingleMode(t *testing.T) {
+func TestExecuteEdit_EmptyEditsArray_Fails(t *testing.T) {
 	dir := t.TempDir()
-	path := filepath.Join(dir, "mixed.txt")
+	path := filepath.Join(dir, "empty.txt")
 	writeFileOrFail(t, path, "hello\n")

-	input, _ := json.Marshal(map[string]any{
-		"path":     path,
-		"old_text": "hello",
-		"new_text": "HELLO",
-		"edits": []Edit{
-			{OldText: "hello", NewText: "HI"},
-		},
+	input, _ := json.Marshal(editArgs{
+		Path:  path,
+		Edits: []Edit{},
 	})

 	resp, err := executeEdit(t.Context(), fantasy.ToolCall{Input: string(input)}, dir)
@@ -924,10 +942,10 @@ func TestExecuteEdit_MultiEdit_MixedWithSingleMode(t *testing.T) {
 		t.Fatalf("executeEdit error: %v", err)
 	}
 	if !resp.IsError {
-		t.Error("expected error when mixing single and multi-edit modes")
+		t.Error("expected error for empty edits array")
 	}
-	if !strings.Contains(resp.Content, "cannot use") {
-		t.Errorf("expected 'cannot use' in error, got: %s", resp.Content)
+	if !strings.Contains(resp.Content, "required") {
+		t.Errorf("expected 'required' in error, got: %s", resp.Content)
 	}
 }

@@ -42,6 +42,9 @@ func NewLsTool(opts ...ToolOption) fantasy.AgentTool {
 }

 func executeLs(ctx context.Context, call fantasy.ToolCall, workDir string) (fantasy.ToolResponse, error) {
+	if err := ctx.Err(); err != nil {
+		return fantasy.ToolResponse{}, err
+	}
 	var args lsArgs
 	_ = parseArgs(call.Input, &args) // optional args

@@ -47,6 +47,9 @@ func NewReadTool(opts ...ToolOption) fantasy.AgentTool {
 }

 func executeRead(ctx context.Context, call fantasy.ToolCall, workDir string) (fantasy.ToolResponse, error) {
+	if err := ctx.Err(); err != nil {
+		return fantasy.ToolResponse{}, err
+	}
 	var args readArgs
 	if err := parseArgs(call.Input, &args); err != nil {
 		return fantasy.NewTextErrorResponse("path parameter is required"), nil
@@ -86,7 +86,7 @@ Example use cases:
 				},
 				"model": map[string]any{
 					"type":        "string",
-					"description": "Optional model override (e.g. 'anthropic/claude-haiku-3-5-20241022' for faster/cheaper tasks)",
+					"description": "Optional model override. Empty string uses the current model.",
 				},
 				"system_prompt": map[string]any{
 					"type":        "string",
@@ -94,7 +94,7 @@ Example use cases:
 				},
 				"timeout_seconds": map[string]any{
 					"type":        "number",
-					"description": "Maximum execution time in seconds (default: 300, max: 1800)",
+					"description": "Maximum execution time in seconds (default: 300, max: 1800, minimum recommended: 240)",
 				},
 			},
 			Required: []string{"task"},
@@ -41,6 +41,9 @@ func NewWriteTool(opts ...ToolOption) fantasy.AgentTool {
 }

 func executeWrite(ctx context.Context, call fantasy.ToolCall, workDir string) (fantasy.ToolResponse, error) {
+	if err := ctx.Err(); err != nil {
+		return fantasy.ToolResponse{}, err
+	}
 	var args writeArgs
 	if err := parseArgs(call.Input, &args); err != nil {
 		return fantasy.NewTextErrorResponse("path and content parameters are required"), nil
@@ -0,0 +1,234 @@
+package extbridge
+
+import (
+	"context"
+
+	"github.com/mark3labs/kit/internal/extensions"
+	kit "github.com/mark3labs/kit/pkg/kit"
+)
+
+// BaseContext returns an extensions.Context populated with the headless,
+// TUI-independent delegation fields: data access, state, options,
+// model/tool management, completions, subagents, tree navigation, skills,
+// template parsing, and model resolution.
+//
+// Callers overlay their UI-specific fields (print routes, widgets, prompts,
+// editor, TUI-aware SetModel/ReloadExtensions, etc.) on the returned value:
+// cmd/extension_context.go for the interactive TUI and
+// internal/acpserver/session.go for headless ACP mode. Keeping the shared
+// half here means a new data-access Context field only has to be wired once.
+//
+// ctx is used for subagent spawns; pass a long-lived context (not a
+// per-request one) so later spawns aren't cancelled prematurely.
+func BaseContext(ctx context.Context, kitInstance *kit.Kit) extensions.Context {
+	return extensions.Context{
+		// -------------------------------------------------------------------
+		// Data access
+		// -------------------------------------------------------------------
+		GetContextStats: func() extensions.ContextStats {
+			s := kitInstance.GetContextStats()
+			return extensions.ContextStats{
+				EstimatedTokens: s.EstimatedTokens,
+				ContextLimit:    s.ContextLimit,
+				UsagePercent:    s.UsagePercent,
+				MessageCount:    s.MessageCount,
+			}
+		},
+		GetMessages: func() []extensions.SessionMessage {
+			return kitInstance.Extensions().GetSessionMessages()
+		},
+		GetSessionPath: func() string {
+			return kitInstance.GetSessionPath()
+		},
+		AppendEntry: func(entryType string, data string) (string, error) {
+			return kitInstance.Extensions().AppendEntry(entryType, data)
+		},
+		GetEntries: func(entryType string) []extensions.ExtensionEntry {
+			return kitInstance.Extensions().GetEntries(entryType)
+		},
+
+		// -------------------------------------------------------------------
+		// Extension state
+		// -------------------------------------------------------------------
+		SetState: func(key string, value string) {
+			kitInstance.Extensions().SetState(key, value)
+		},
+		GetState: func(key string) (string, bool) {
+			return kitInstance.Extensions().GetState(key)
+		},
+		DeleteState: func(key string) {
+			kitInstance.Extensions().DeleteState(key)
+		},
+		ListState: func() []string {
+			return kitInstance.Extensions().ListState()
+		},
+
+		// -------------------------------------------------------------------
+		// Options, model, and tool management
+		// -------------------------------------------------------------------
+		GetOption: func(name string) string {
+			return kitInstance.Extensions().GetOption(name)
+		},
+		SetOption: func(name string, value string) {
+			kitInstance.Extensions().SetOption(name, value)
+		},
+		// Headless model switch. The interactive TUI overrides this with a
+		// version that also notifies the TUI and refreshes the usage tracker.
+		SetModel: func(modelString string) error {
+			previousModel := kitInstance.Extensions().GetContext().Model
+			if err := kitInstance.SetModel(context.Background(), modelString); err != nil {
+				return err
+			}
+			kitInstance.Extensions().UpdateContextModel(modelString)
+			kitInstance.Extensions().EmitModelChange(modelString, previousModel, "extension")
+			return nil
+		},
+		GetAvailableModels: func() []extensions.ModelInfoEntry {
+			return kitInstance.GetAvailableModels()
+		},
+		EmitCustomEvent: func(name string, data string) {
+			kitInstance.Extensions().EmitCustomEvent(name, data)
+		},
+		GetAllTools: func() []extensions.ToolInfo {
+			return kitInstance.Extensions().GetToolInfos()
+		},
+		SetActiveTools: func(names []string) {
+			kitInstance.Extensions().SetActiveTools(names)
+		},
+		// Headless reload. The interactive TUI overrides this to also
+		// refresh widgets/status/commands.
+		ReloadExtensions: func() error {
+			return kitInstance.Extensions().Reload()
+		},
+
+		// -------------------------------------------------------------------
+		// LLM completions and subagents
+		// -------------------------------------------------------------------
+		Complete: func(req extensions.CompleteRequest) (extensions.CompleteResponse, error) {
+			return kitInstance.ExecuteCompletion(context.Background(), req)
+		},
+		SpawnSubagent: func(config extensions.SubagentConfig) (*extensions.SubagentHandle, *extensions.SubagentResult, error) {
+			return SpawnSubagent(ctx, kitInstance, config)
+		},
+
+		// -------------------------------------------------------------------
+		// Tree Navigation API
+		// -------------------------------------------------------------------
+		GetTreeNode: func(entryID string) *extensions.TreeNode {
+			node := kitInstance.GetTreeNode(entryID)
+			if node == nil {
+				return nil
+			}
+			return &extensions.TreeNode{
+				ID:        node.ID,
+				ParentID:  node.ParentID,
+				Type:      node.Type,
+				Role:      node.Role,
+				Content:   node.Content,
+				Model:     node.Model,
+				Provider:  node.Provider,
+				Timestamp: node.Timestamp,
+				Children:  node.Children,
+			}
+		},
+		GetCurrentBranch: func() []extensions.TreeNode {
+			nodes := kitInstance.GetCurrentBranch()
+			result := make([]extensions.TreeNode, len(nodes))
+			for i, n := range nodes {
+				result[i] = extensions.TreeNode{
+					ID:        n.ID,
+					ParentID:  n.ParentID,
+					Type:      n.Type,
+					Role:      n.Role,
+					Content:   n.Content,
+					Model:     n.Model,
+					Provider:  n.Provider,
+					Timestamp: n.Timestamp,
+					Children:  n.Children,
+				}
+			}
+			return result
+		},
+		GetChildren: func(parentID string) []string {
+			return kitInstance.GetChildren(parentID)
+		},
+		NavigateTo: func(entryID string) extensions.TreeNavigationResult {
+			err := kitInstance.NavigateTo(entryID)
+			if err != nil {
+				return extensions.TreeNavigationResult{Success: false, Error: err.Error()}
+			}
+			return extensions.TreeNavigationResult{Success: true}
+		},
+		SummarizeBranch: func(fromID, toID string) string {
+			summary, _ := kitInstance.SummarizeBranch(fromID, toID)
+			return summary
+		},
+		CollapseBranch: func(fromID, toID, summary string) extensions.TreeNavigationResult {
+			err := kitInstance.CollapseBranch(fromID, toID, summary)
+			if err != nil {
+				return extensions.TreeNavigationResult{Success: false, Error: err.Error()}
+			}
+			return extensions.TreeNavigationResult{Success: true}
+		},
+
+		// -------------------------------------------------------------------
+		// Skill Loading API (context-injection variants are TUI-specific and
+		// wired by the interactive overlay)
+		// -------------------------------------------------------------------
+		LoadSkill: func(path string) (*extensions.Skill, string) {
+			s, err := kitInstance.LoadSkillForExtension(path)
+			return s, err
+		},
+		LoadSkillsFromDir: func(dir string) extensions.SkillLoadResult {
+			return kitInstance.LoadSkillsFromDirForExtension(dir)
+		},
+		DiscoverSkills: func() extensions.SkillLoadResult {
+			skills := kitInstance.DiscoverSkillsForExtension()
+			return extensions.SkillLoadResult{Skills: skills}
+		},
+		GetAvailableSkills: func() []extensions.Skill {
+			return kitInstance.DiscoverSkillsForExtension()
+		},
+
+		// -------------------------------------------------------------------
+		// Template Parsing API
+		// -------------------------------------------------------------------
+		ParseTemplate: func(name, content string) extensions.PromptTemplate {
+			return kit.ParseTemplate(name, content)
+		},
+		RenderTemplate: func(tpl extensions.PromptTemplate, vars map[string]string) string {
+			return kit.RenderTemplate(tpl, vars)
+		},
+		ParseArguments: func(input string, pattern extensions.ArgumentPattern) extensions.ParseResult {
+			return kit.ParseArguments(input, pattern)
+		},
+		SimpleParseArguments: func(input string, count int) []string {
+			return kit.SimpleParseArguments(input, count)
+		},
+		EvaluateModelConditional: func(condition string) bool {
+			return kit.EvaluateModelConditional(kitInstance.Extensions().GetContext().Model, condition)
+		},
+		RenderWithModelConditionals: func(content string) string {
+			return kit.RenderWithModelConditionals(content, kitInstance.Extensions().GetContext().Model)
+		},
+
+		// -------------------------------------------------------------------
+		// Model Resolution API
+		// -------------------------------------------------------------------
+		ResolveModelChain: func(preferences []string) extensions.ModelResolutionResult {
+			return kit.ResolveModelChain(preferences)
+		},
+		GetModelCapabilities: func(model string) (extensions.ModelCapabilities, string) {
+			return kit.GetModelCapabilities(model)
+		},
+		CheckModelAvailable: func(model string) bool {
+			return kit.CheckModelAvailable(model)
+		},
+		GetCurrentProvider: func() string {
+			return kit.GetCurrentProvider(kitInstance.Extensions().GetContext().Model)
+		},
+		GetCurrentModelID: func() string {
+			return kit.GetCurrentModelID(kitInstance.Extensions().GetContext().Model)
+		},
+	}
+}
@@ -0,0 +1,98 @@
+// Package extbridge wires the public Kit SDK to the internal extensions
+// package. It exists so that cmd/ and internal/acpserver/ don't both
+// reimplement the same SDK→extension event/subagent conversions.
+package extbridge
+
+import (
+	"context"
+
+	"github.com/mark3labs/kit/internal/extensions"
+	kit "github.com/mark3labs/kit/pkg/kit"
+)
+
+// SDKEventToSubagentEvent converts an SDK [kit.Event] into the
+// extension-facing [extensions.SubagentEvent]. Returns a zero-value event
+// (Type=="") for events that don't map to anything useful — callers should
+// drop those.
+func SDKEventToSubagentEvent(e kit.Event) extensions.SubagentEvent {
+	switch ev := e.(type) {
+	case kit.MessageUpdateEvent:
+		return extensions.SubagentEvent{Type: "text", Content: ev.Chunk}
+	case kit.ReasoningDeltaEvent:
+		return extensions.SubagentEvent{Type: "reasoning", Content: ev.Delta}
+	case kit.ToolCallEvent:
+		return extensions.SubagentEvent{
+			Type: "tool_call", ToolCallID: ev.ToolCallID,
+			ToolName: ev.ToolName, ToolKind: ev.ToolKind, ToolArgs: ev.ToolArgs,
+		}
+	case kit.ToolExecutionStartEvent:
+		return extensions.SubagentEvent{
+			Type: "tool_execution_start", ToolCallID: ev.ToolCallID,
+			ToolName: ev.ToolName, ToolKind: ev.ToolKind,
+		}
+	case kit.ToolExecutionEndEvent:
+		return extensions.SubagentEvent{
+			Type: "tool_execution_end", ToolCallID: ev.ToolCallID,
+			ToolName: ev.ToolName, ToolKind: ev.ToolKind,
+		}
+	case kit.ToolResultEvent:
+		return extensions.SubagentEvent{
+			Type: "tool_result", ToolCallID: ev.ToolCallID,
+			ToolName: ev.ToolName, ToolKind: ev.ToolKind,
+			ToolResult: ev.Result, IsError: ev.IsError,
+		}
+	case kit.TurnStartEvent:
+		return extensions.SubagentEvent{Type: "turn_start"}
+	case kit.TurnEndEvent:
+		return extensions.SubagentEvent{Type: "turn_end"}
+	default:
+		return extensions.SubagentEvent{}
+	}
+}
+
+// SpawnSubagent runs a subagent in-process via the Kit SDK and translates
+// the result/events back into the extension-facing types. The returned
+// handle is always nil — the SDK path runs synchronously and does not
+// expose a separate process handle. Callers that need non-blocking
+// behaviour should run this in their own goroutine.
+//
+// This function consolidates the previously-duplicated wiring in
+// cmd/root.go (interactive + runtime contexts) and
+// internal/acpserver/session.go.
+func SpawnSubagent(ctx context.Context, k *kit.Kit, cfg extensions.SubagentConfig) (*extensions.SubagentHandle, *extensions.SubagentResult, error) {
+	sdkCfg := kit.SubagentConfig{
+		Prompt:       cfg.Prompt,
+		Model:        cfg.Model,
+		SystemPrompt: cfg.SystemPrompt,
+		Timeout:      cfg.Timeout,
+		NoSession:    cfg.NoSession,
+		Tools:        k.GetToolsForSubagent(),
+	}
+	if cfg.OnEvent != nil {
+		sdkCfg.OnEvent = func(e kit.Event) {
+			se := SDKEventToSubagentEvent(e)
+			if se.Type != "" {
+				cfg.OnEvent(se)
+			}
+		}
+	}
+
+	result, err := k.Subagent(ctx, sdkCfg)
+	if result == nil {
+		return nil, &extensions.SubagentResult{Error: err}, err
+	}
+
+	extResult := &extensions.SubagentResult{
+		Response:  result.Response,
+		Error:     err,
+		SessionID: result.SessionID,
+		Elapsed:   result.Elapsed,
+	}
+	if result.Usage != nil {
+		extResult.Usage = &extensions.SubagentUsage{
+			InputTokens:  result.Usage.InputTokens,
+			OutputTokens: result.Usage.OutputTokens,
+		}
+	}
+	return nil, extResult, err
+}
@@ -1,5 +1,24 @@
 package extensions

+import (
+	"errors"
+)
+
+// ErrAgentBusy is returned (wrapped) when an extension API call that requires
+// the agent to be idle cannot proceed because the agent is still processing a
+// turn or post-turn hooks. Most notably, ctx.NewSession waits for idle
+// internally; if its wait deadline elapses it returns an error that wraps
+// this sentinel.
+//
+// Extensions can detect the condition with errors.Is:
+//
+//	if err := ctx.NewSession(prompt); err != nil {
+//	    if errors.Is(err, ext.ErrAgentBusy) {
+//	        // agent never settled — fall back to a queued message instead
+//	    }
+//	}
+var ErrAgentBusy = errors.New("agent is busy")
+
 // ---------------------------------------------------------------------------
 // Internal types (used by runner, NOT exposed to Yaegi)
 // ---------------------------------------------------------------------------
@@ -124,6 +143,48 @@ type Context struct {
 	//   })
 	SendMultimodalMessage func(text string, files []FilePart)

+	// NewSession ends the current session and starts a fresh one (matching
+	// the /new slash command). When prompt is non-empty it is submitted as
+	// the first user turn of the new session, with @file references
+	// expanded the same way they are for normal user input. Pass an empty
+	// string to start an empty session.
+	//
+	// If the agent is currently busy when NewSession is called (for example,
+	// from an OnAgentEnd hook that fires before the agent fully settles, or
+	// while post-turn formatters/linters are still running), the call blocks
+	// until the agent transitions to idle. This avoids the v0.79.0
+	// phase-handoff race where NewSession from OnAgentEnd would fail with
+	// "agent is busy" because TurnEnd fires before the busy flag clears.
+	// The wait has a generous internal timeout; if it elapses the returned
+	// error wraps ErrAgentBusy (detectable with errors.Is).
+	//
+	// Returns an error if the agent does not become idle within the wait
+	// window, if a registered BeforeSessionSwitch handler cancels the
+	// switch, or if the new session file cannot be created. In
+	// non-interactive (ACP / headless) mode this is a no-op that returns
+	// an error.
+	//
+	// Because NewSession may block, call it from a goroutine — not
+	// directly from inside an event handler that the agent loop is waiting
+	// on.
+	//
+	// Typical pattern — start a fresh session at the end of a phase by
+	// reading a handoff file:
+	//
+	//   api.OnAgentEnd(func(e ext.AgentEndEvent, ctx ext.Context) {
+	//       msgs := ctx.GetMessages()
+	//       if len(msgs) == 0 {
+	//           return
+	//       }
+	//       last := msgs[len(msgs)-1].Content
+	//       if strings.Contains(last, "<HANDOFF_READY>") {
+	//           go func() {
+	//               _ = ctx.NewSession("Read @HANDOFF.md and continue the next phase.")
+	//           }()
+	//       }
+	//   })
+	NewSession func(prompt string) error
+
 	// GetSessionUsage returns aggregated token usage and cost statistics
 	// for the current session. This includes total input/output tokens,
 	// cache read/write tokens, total cost, and request count.
@@ -341,6 +402,13 @@ type Context struct {
 	// The data survives across session restarts and can be retrieved via
 	// GetEntries. Use entryType to namespace your data (e.g. "myext:state").
 	//
+	// AppendEntry is append-only and lives in the conversation tree, which
+	// makes it the right tool for audit logs and event histories. For
+	// last-write-wins snapshot state — "what's the current value of X?" —
+	// prefer SetState / GetState instead. Those primitives store data in a
+	// sidecar file outside the conversation tree, are O(1) to read/write,
+	// and do not bloat branch reads or duplicate on fork.
+	//
 	// Example:
 	//
 	//   data, _ := json.Marshal(myState)
@@ -360,6 +428,45 @@ type Context struct {
 	//   }
 	GetEntries func(entryType string) []ExtensionEntry

+	// SetState stores a key-value pair in session-scoped, last-write-wins
+	// extension state. Unlike AppendEntry the value is kept in a sidecar
+	// file outside the conversation tree, so:
+	//   - reads are O(1) (no branch walk)
+	//   - writes don't bloat the session JSONL
+	//   - state is not duplicated on fork (branches share the sidecar)
+	//   - state is invisible to the LLM
+	//
+	// Use SetState for snapshot state ("current value of X"); use
+	// AppendEntry for audit logs and event histories. Namespace keys with
+	// your extension name to avoid collisions (e.g. "myext:budget-cap").
+	//
+	// State persists for the lifetime of the session. For ephemeral or
+	// in-memory sessions the state lives only in memory.
+	//
+	// Example:
+	//
+	//   ctx.SetState("myext:budget-cap", "10.00")
+	SetState func(key string, value string)
+
+	// GetState returns the value previously stored via SetState. The bool
+	// is false when the key was never written. Returns ("", false) when
+	// state is unavailable.
+	//
+	// Example:
+	//
+	//   if cap, ok := ctx.GetState("myext:budget-cap"); ok {
+	//       fmt.Println("current cap:", cap)
+	//   }
+	GetState func(key string) (string, bool)
+
+	// DeleteState removes a key from session-scoped extension state.
+	// No-op when the key is missing.
+	DeleteState func(key string)
+
+	// ListState returns all keys currently stored in session-scoped
+	// extension state, in unspecified order.
+	ListState func() []string
+
 	// SetEditorText sets the text content of the input editor. This can
 	// be used to pre-fill the editor with suggested text (e.g. extracted
 	// questions, handoff prompts). The cursor is moved to the end.
@@ -670,7 +777,8 @@ type Context struct {
 	LoadSkillsFromDir func(dir string) SkillLoadResult

 	// DiscoverSkills finds skills in standard locations.
-	// Checks ~/.config/kit/skills/, .kit/skills/, .agents/skills/
+	// Checks ~/.agents/skills/, ~/.config/kit/skills/, <project>/.agents/skills/,
+	// and <project>/.kit/skills/.
 	DiscoverSkills func() SkillLoadResult

 	// InjectSkillAsContext sends a skill's content as a system message.
@@ -802,9 +910,24 @@ type Skill struct {
 	Content string
 	// Path is the absolute filesystem path.
 	Path string
-	// Tags are optional labels for categorization.
+	// License is an optional SPDX license identifier (agentskills.io field).
+	License string
+	// Compatibility is an optional note describing targeted environments
+	// (agentskills.io field).
+	Compatibility string
+	// Metadata is an optional bag of arbitrary string key/value pairs
+	// (agentskills.io field).
+	Metadata map[string]string
+	// AllowedTools optionally restricts which tools the skill may use
+	// (experimental agentskills.io field).
+	AllowedTools string
+	// DisableModelInvocation hides the skill from the model-facing catalog
+	// while keeping it available via explicit activation (agentskills.io field).
+	DisableModelInvocation bool
+	// Tags are optional labels for categorization. Kit extension.
 	Tags []string
 	// When controls automatic inclusion: "always", "on-demand", or file-glob.
+	// Kit extension.
 	When string
 }

@@ -918,7 +1041,7 @@ type ExtensionEntry struct {
 type ContextMessage struct {
 	// Index is the position of this message in the original context array
 	// (0-based). When returning messages from a ContextPrepareResult,
-	// messages with Index >= 0 reuse the original fantasy.Message at that
+	// messages with Index >= 0 reuse the original LLM message at that
 	// position (preserving tool calls, reasoning, and other complex parts).
 	// Set Index to -1 for newly injected messages (created from Role + Content).
 	Index int
@@ -1063,6 +1186,9 @@ type PrintBlockOpts struct {
 type API struct {
 	// Event-specific registration functions (wired by the loader).
 	onToolCall                func(func(ToolCallEvent, Context) *ToolCallResult)
+	onToolCallInputStart      func(func(ToolCallInputStartEvent, Context))
+	onToolCallInputDelta      func(func(ToolCallInputDeltaEvent, Context))
+	onToolCallInputEnd        func(func(ToolCallInputEndEvent, Context))
 	onToolExecStart           func(func(ToolExecutionStartEvent, Context))
 	onToolExecEnd             func(func(ToolExecutionEndEvent, Context))
 	onToolOutput              func(func(ToolOutputEvent, Context))
@@ -1091,6 +1217,15 @@ type API struct {
 	onSubagentStart           func(func(SubagentStartEvent, Context))
 	onSubagentChunk           func(func(SubagentChunkEvent, Context))
 	onSubagentEnd             func(func(SubagentEndEvent, Context))
+	onStepStart               func(func(StepStartEvent, Context))
+	onStepFinish              func(func(StepFinishEvent, Context))
+	onReasoningStart          func(func(ReasoningStartEvent, Context))
+	onWarnings                func(func(WarningsEvent, Context))
+	onSource                  func(func(SourceEvent, Context))
+	onError                   func(func(ErrorEvent, Context))
+	onRetry                   func(func(RetryEvent, Context))
+	onPrepareStep             func(func(PrepareStepEvent, Context) *PrepareStepResult)
+	onLLMUsage                func(func(LLMUsageEvent, Context))
 }

 // OnToolCall registers a handler that fires before a tool executes.
@@ -1099,6 +1234,26 @@ func (a *API) OnToolCall(handler func(ToolCallEvent, Context) *ToolCallResult) {
 	a.onToolCall(handler)
 }

+// OnToolCallInputStart registers a handler that fires when the LLM begins
+// generating tool call arguments. The tool name is known but the full
+// argument JSON is still being streamed. Useful for showing a "running"
+// indicator immediately without waiting for the full arguments.
+func (a *API) OnToolCallInputStart(handler func(ToolCallInputStartEvent, Context)) {
+	a.onToolCallInputStart(handler)
+}
+
+// OnToolCallInputDelta registers a handler that fires for each streamed
+// fragment of tool call arguments as they arrive from the LLM.
+func (a *API) OnToolCallInputDelta(handler func(ToolCallInputDeltaEvent, Context)) {
+	a.onToolCallInputDelta(handler)
+}
+
+// OnToolCallInputEnd registers a handler that fires when tool argument
+// streaming is complete, before the tool call is parsed and execution begins.
+func (a *API) OnToolCallInputEnd(handler func(ToolCallInputEndEvent, Context)) {
+	a.onToolCallInputEnd(handler)
+}
+
 // OnToolExecutionStart registers a handler for tool execution start.
 func (a *API) OnToolExecutionStart(handler func(ToolExecutionStartEvent, Context)) {
 	a.onToolExecStart(handler)
@@ -1278,6 +1433,69 @@ func (a *API) OnBeforeCompact(handler func(BeforeCompactEvent, Context) *BeforeC
 	a.onBeforeCompact(handler)
 }

+// OnStepStart registers a handler that fires when a new LLM call begins
+// within a multi-step agent turn.
+func (a *API) OnStepStart(handler func(StepStartEvent, Context)) {
+	a.onStepStart(handler)
+}
+
+// OnStepFinish registers a handler that fires when a step completes,
+// providing step number, finish reason, and decomposed token usage.
+func (a *API) OnStepFinish(handler func(StepFinishEvent, Context)) {
+	a.onStepFinish(handler)
+}
+
+// OnReasoningStart registers a handler that fires when the LLM begins
+// reasoning/thinking.
+func (a *API) OnReasoningStart(handler func(ReasoningStartEvent, Context)) {
+	a.onReasoningStart(handler)
+}
+
+// OnWarnings registers a handler that fires when the LLM provider returns
+// warnings about the request.
+func (a *API) OnWarnings(handler func(WarningsEvent, Context)) {
+	a.onWarnings(handler)
+}
+
+// OnSource registers a handler that fires when the LLM references a source
+// (e.g. from web search tools).
+func (a *API) OnSource(handler func(SourceEvent, Context)) {
+	a.onSource(handler)
+}
+
+// OnError registers a handler that fires when an agent-level error occurs
+// during streaming.
+func (a *API) OnError(handler func(ErrorEvent, Context)) {
+	a.onError(handler)
+}
+
+// OnRetry registers a handler that fires when the LLM provider request is
+// retried after a transient error.
+func (a *API) OnRetry(handler func(RetryEvent, Context)) {
+	a.onRetry(handler)
+}
+
+// OnPrepareStep registers a handler that fires between steps within a
+// multi-step agent turn, after steering messages are injected and before
+// messages are sent to the LLM. Return a non-nil PrepareStepResult with
+// Messages to replace the context window for this step.
+func (a *API) OnPrepareStep(handler func(PrepareStepEvent, Context) *PrepareStepResult) {
+	a.onPrepareStep(handler)
+}
+
+// OnLLMUsage registers a handler that fires after each LLM provider call
+// with the token and cost deltas for that single call. Use this for
+// per-call usage attribution, real-time budget enforcement, and cost
+// dashboards that need to react between calls within a single agent turn.
+//
+// Handlers receive an LLMUsageEvent describing the call's input/output
+// tokens, cache tokens, computed cost, model, and provider. A single agent
+// turn typically fires multiple LLMUsageEvents (one per tool-loop
+// iteration).
+func (a *API) OnLLMUsage(handler func(LLMUsageEvent, Context)) {
+	a.onLLMUsage(handler)
+}
+
 // RegisterToolRenderer registers a custom renderer for a specific tool's
 // display in the TUI. The renderer controls the header (parameter summary)
 // and/or body (result display) of the tool's output block. If multiple
@@ -1890,6 +2108,34 @@ type ToolCallResult struct {

 func (ToolCallResult) isResult() {}

+// ToolCallInputStartEvent fires when the LLM begins generating tool call
+// arguments. The tool name is known but the full argument JSON is still
+// being streamed.
+type ToolCallInputStartEvent struct {
+	ToolCallID string
+	ToolName   string
+	ToolKind   string // Tool classification: "execute", "edit", "read", "search", "agent"
+}
+
+func (e ToolCallInputStartEvent) Type() EventType { return ToolCallInputStart }
+
+// ToolCallInputDeltaEvent fires for each streamed fragment of tool call
+// arguments as they arrive from the LLM.
+type ToolCallInputDeltaEvent struct {
+	ToolCallID string
+	Delta      string // JSON fragment of tool arguments
+}
+
+func (e ToolCallInputDeltaEvent) Type() EventType { return ToolCallInputDelta }
+
+// ToolCallInputEndEvent fires when tool argument streaming is complete,
+// before the tool call is parsed and execution begins.
+type ToolCallInputEndEvent struct {
+	ToolCallID string
+}
+
+func (e ToolCallInputEndEvent) Type() EventType { return ToolCallInputEnd }
+
 // ToolExecutionStartEvent fires when a tool begins executing.
 type ToolExecutionStartEvent struct {
 	ToolCallID string
@@ -1982,10 +2228,47 @@ type AgentStartEvent struct {

 func (e AgentStartEvent) Type() EventType { return AgentStart }

-// AgentEndEvent fires when the agent finishes responding.
+// AgentEndEvent fires when the agent finishes responding. In addition to the
+// final response and stop reason, the event carries per-turn aggregates so
+// observer-style extensions don't have to maintain parallel bookkeeping in
+// OnToolResult / OnStepFinish handlers.
 type AgentEndEvent struct {
 	Response   string
 	StopReason string // "completed", "cancelled", "error"
+
+	// ToolCallCount is the total number of tool invocations observed during
+	// this turn (sum across all steps).
+	ToolCallCount int
+
+	// ToolNames lists the tool names invoked during this turn, in call order.
+	// Duplicates are preserved (e.g. two bash calls produce ["bash", "bash"]).
+	ToolNames []string
+
+	// LLMCallCount is the number of LLM round-trips (tool-loop iterations)
+	// performed during this turn. Always >= 1 for a successful turn.
+	LLMCallCount int
+
+	// InputTokensDelta is the sum of input tokens consumed during this turn
+	// across every LLM call (including cache-hit input tokens).
+	InputTokensDelta int
+
+	// OutputTokensDelta is the sum of output tokens generated during this turn.
+	OutputTokensDelta int
+
+	// CacheReadTokensDelta is the sum of cache-read tokens during this turn.
+	CacheReadTokensDelta int
+
+	// CacheWriteTokensDelta is the sum of cache-write tokens during this turn.
+	CacheWriteTokensDelta int
+
+	// CostDelta is the total cost in USD attributable to this turn. Computed
+	// from per-step usage and current model pricing. Zero when pricing is
+	// unknown or OAuth credentials are in use.
+	CostDelta float64
+
+	// DurationMs is the elapsed wall-clock time from AgentStart to AgentEnd,
+	// in milliseconds.
+	DurationMs int64
 }

 func (e AgentEndEvent) Type() EventType { return AgentEnd }
@@ -2090,6 +2373,12 @@ type BeforeSessionSwitchEvent struct {
 	// Reason describes why the switch is happening: "new" for /new command,
 	// "clear" for /clear command.
 	Reason string
+	// InitialPrompt, when non-empty, is the prompt that will be submitted
+	// as the first user turn of the new session. Set when /new is invoked
+	// with an argument (e.g. "/new continue from HANDOFF.md") or when an
+	// extension calls ctx.NewSession(prompt). Extensions may inspect this
+	// to decide whether to allow the switch.
+	InitialPrompt string
 }

 func (e BeforeSessionSwitchEvent) Type() EventType { return BeforeSessionSwitch }
@@ -2202,6 +2491,135 @@ type SubagentEndEvent struct {

 func (e SubagentEndEvent) Type() EventType { return SubagentEnd }

+// ---------------------------------------------------------------------------
+// Step lifecycle events (exposed to Yaegi — concrete structs)
+// ---------------------------------------------------------------------------
+
+// StepStartEvent fires when a new LLM call begins within a multi-step agent turn.
+type StepStartEvent struct {
+	StepNumber int
+}
+
+func (e StepStartEvent) Type() EventType { return StepStart }
+
+// StepFinishEvent fires when a step completes, providing step metadata and
+// token usage. Usage fields are plain int64 (not LLMUsage) because Yaegi
+// cannot handle fantasy types across the interpreter boundary.
+type StepFinishEvent struct {
+	StepNumber       int
+	HasToolCalls     bool
+	FinishReason     string
+	InputTokens      int64
+	OutputTokens     int64
+	CacheReadTokens  int64
+	CacheWriteTokens int64
+}
+
+func (e StepFinishEvent) Type() EventType { return StepFinish }
+
+// ReasoningStartEvent fires when the LLM begins reasoning/thinking.
+type ReasoningStartEvent struct {
+	ID string
+}
+
+func (e ReasoningStartEvent) Type() EventType { return ReasoningStart }
+
+// WarningsEvent fires when the LLM provider returns warnings about the request.
+type WarningsEvent struct {
+	Warnings []string
+}
+
+func (e WarningsEvent) Type() EventType { return Warnings }
+
+// SourceEvent fires when the LLM references a source (e.g. from web search).
+type SourceEvent struct {
+	SourceType string
+	ID         string
+	URL        string
+	Title      string
+}
+
+func (e SourceEvent) Type() EventType { return Source }
+
+// ErrorEvent fires when an agent-level error occurs during streaming.
+// Uses string instead of error because Yaegi cannot handle the error
+// interface reliably across the interpreter boundary.
+type ErrorEvent struct {
+	Error string
+}
+
+func (e ErrorEvent) Type() EventType { return Error }
+
+// RetryEvent fires when the LLM provider request is retried after a
+// transient error.
+type RetryEvent struct {
+	Attempt int
+	Error   string
+}
+
+func (e RetryEvent) Type() EventType { return Retry }
+
+// PrepareStepEvent fires between steps within a multi-step agent turn,
+// after steering messages are injected and before messages are sent to
+// the LLM. Handlers can inspect and replace the context window.
+type PrepareStepEvent struct {
+	// StepNumber is the zero-based step index within the current turn.
+	StepNumber int
+	// Messages is the current context window that will be sent to the LLM.
+	Messages []ContextMessage
+}
+
+func (e PrepareStepEvent) Type() EventType { return PrepareStep }
+
+// PrepareStepResult allows extensions to replace the context window between
+// steps. Return nil Messages to leave the context unchanged.
+type PrepareStepResult struct {
+	// Messages replaces the entire context window for this step. If nil,
+	// the original messages are used unchanged. Messages with a non-negative
+	// Index reuse the original message at that position; messages with
+	// Index < 0 are created fresh from Role + Content.
+	Messages []ContextMessage
+}
+
+func (PrepareStepResult) isResult() {}
+
+// LLMUsageEvent fires after each LLM provider call with the per-call token
+// and cost deltas. Use this for accurate budget tracking, cost dashboards,
+// and any logic that needs to react between LLM calls within a single agent
+// turn (rather than only at turn boundaries).
+//
+// A single agent turn typically produces multiple LLMUsageEvents (one per
+// tool-loop iteration). The Model and Provider fields reflect the model used
+// for that specific call, which may differ from earlier calls if the
+// extension switched models mid-turn via ctx.SetModel().
+type LLMUsageEvent struct {
+	// InputTokens is the number of input tokens for this call.
+	InputTokens int
+	// OutputTokens is the number of output tokens generated by this call.
+	OutputTokens int
+	// CacheReadTokens is the number of cache-hit input tokens (provider-specific).
+	CacheReadTokens int
+	// CacheWriteTokens is the number of cache-write tokens.
+	CacheWriteTokens int
+	// Cost is the USD cost of this call computed from the model's per-token
+	// pricing. Zero when pricing is unknown or OAuth credentials are in use.
+	Cost float64
+	// Model is the model identifier used for this call (e.g. "claude-sonnet-4-5-20250929").
+	Model string
+	// Provider is the provider identifier (e.g. "anthropic", "openai").
+	Provider string
+	// RequestID is an optional correlation id for the underlying provider
+	// call. May be empty when the provider does not surface one.
+	RequestID string
+	// StepNumber is the zero-based step index within the current agent turn.
+	StepNumber int
+	// FinishReason mirrors the provider's finish reason for this call
+	// (e.g. "stop", "tool_calls", "length"). May be empty.
+	FinishReason string
+}
+
+func (e LLMUsageEvent) Type() EventType { return LLMUsage }
+
 // ThemeColor is an adaptive color pair with light and dark hex values.
 // Either field may be empty to inherit from the default theme.
 type ThemeColor struct {
@@ -13,6 +13,19 @@ const (
 	// ToolCall fires before a tool executes. Handlers can block execution.
 	ToolCall EventType = "tool_call"

+	// ToolCallInputStart fires when the LLM begins generating tool call
+	// arguments. The tool name is known but the full argument JSON is still
+	// being streamed.
+	ToolCallInputStart EventType = "tool_call_input_start"
+
+	// ToolCallInputDelta fires for each streamed fragment of tool call
+	// arguments as they arrive from the LLM.
+	ToolCallInputDelta EventType = "tool_call_input_delta"
+
+	// ToolCallInputEnd fires when tool argument streaming is complete,
+	// before the tool call is parsed and execution begins.
+	ToolCallInputEnd EventType = "tool_call_input_end"
+
 	// ToolExecutionStart fires when a tool begins executing.
 	ToolExecutionStart EventType = "tool_execution_start"

@@ -83,18 +96,55 @@ const (
 	// SubagentEnd fires when a subagent tool call completes (success
 	// or error). Carries the final response and any error message.
 	SubagentEnd EventType = "subagent_end"
+
+	// StepStart fires when a new LLM call begins within a multi-step
+	// agent turn.
+	StepStart EventType = "step_start"
+
+	// StepFinish fires when a step completes, providing step number,
+	// finish reason, and token usage.
+	StepFinish EventType = "step_finish"
+
+	// ReasoningStart fires when the LLM begins reasoning/thinking.
+	ReasoningStart EventType = "reasoning_start"
+
+	// Warnings fires when the LLM provider returns warnings.
+	Warnings EventType = "warnings"
+
+	// Source fires when the LLM references a source (e.g. web search).
+	Source EventType = "source"
+
+	// Error fires when an agent-level error occurs during streaming.
+	Error EventType = "error"
+
+	// Retry fires when the LLM provider request is retried after a
+	// transient error.
+	Retry EventType = "retry"
+
+	// PrepareStep fires between steps within a multi-step agent turn,
+	// after steering messages are injected and before messages are sent
+	// to the LLM. Handlers can replace the context window for this step.
+	PrepareStep EventType = "prepare_step"
+
+	// LLMUsage fires after each LLM provider call with the token and cost
+	// deltas for that single call. Extensions use it to attribute usage to
+	// specific calls/models and to drive budget enforcement between calls.
+	LLMUsage EventType = "llm_usage"
 )

 // AllEventTypes returns every supported event type.
 func AllEventTypes() []EventType {
 	return []EventType{
-		ToolCall, ToolExecutionStart, ToolExecutionEnd, ToolResult,
+		ToolCall, ToolCallInputStart, ToolCallInputDelta, ToolCallInputEnd,
+		ToolExecutionStart, ToolExecutionEnd, ToolResult,
 		Input, BeforeAgentStart, AgentStart, AgentEnd,
 		MessageStart, MessageUpdate, MessageEnd,
 		SessionStart, SessionShutdown,
 		ModelChange, ContextPrepare,
 		BeforeFork, BeforeSessionSwitch, BeforeCompact,
 		SubagentStart, SubagentChunk, SubagentEnd,
+		StepStart, StepFinish, ReasoningStart, Warnings, Source, Error, Retry,
+		PrepareStep, LLMUsage,
 	}
 }

@@ -4,8 +4,8 @@ import "testing"

 func TestAllEventTypes_Count(t *testing.T) {
 	all := AllEventTypes()
-	if len(all) != 21 {
-		t.Fatalf("expected 21 event types, got %d", len(all))
+	if len(all) != 33 {
+		t.Fatalf("expected 33 event types, got %d", len(all))
 	}
 }

@@ -38,6 +38,9 @@ func TestEventType_TypeMethod(t *testing.T) {
 		want  EventType
 	}{
 		{ToolCallEvent{ToolName: "test"}, ToolCall},
+		{ToolCallInputStartEvent{ToolCallID: "x", ToolName: "test"}, ToolCallInputStart},
+		{ToolCallInputDeltaEvent{ToolCallID: "x", Delta: "{"}, ToolCallInputDelta},
+		{ToolCallInputEndEvent{ToolCallID: "x"}, ToolCallInputEnd},
 		{ToolExecutionStartEvent{ToolName: "test"}, ToolExecutionStart},
 		{ToolExecutionEndEvent{ToolName: "test"}, ToolExecutionEnd},
 		{ToolResultEvent{ToolName: "test"}, ToolResult},
@@ -450,25 +450,6 @@ func globalGitInstallRoot() string {
 	return filepath.Join(base, "kit", "git")
 }

-// GetInstalledPackages returns all installed packages from both scopes.
-func (i *Installer) GetInstalledPackages() ([]ManifestEntry, error) {
-	var all []ManifestEntry
-
-	global, err := i.loadManifest(ScopeGlobal)
-	if err != nil {
-		return nil, fmt.Errorf("loading global manifest: %w", err)
-	}
-	all = append(all, global.Packages...)
-
-	project, err := i.loadManifest(ScopeProject)
-	if err != nil {
-		return nil, fmt.Errorf("loading project manifest: %w", err)
-	}
-	all = append(all, project.Packages...)
-
-	return all, nil
-}
-
 // IsInstalled checks if a package is installed in either scope.
 // Returns (scope, true) if installed, ("", false) otherwise.
 func (i *Installer) IsInstalled(source *GitSource) (InstallScope, bool) {
@@ -245,14 +245,21 @@ func TestManifestEntryIdentity(t *testing.T) {
 	}
 }

+// TestLoadAndSaveManifest exercises the live *Installer.loadManifest /
+// saveManifest round-trip against a temp directory, ensuring an absent
+// manifest loads as empty and a saved manifest reads back identically.
 func TestLoadAndSaveManifest(t *testing.T) {
 	tempDir := t.TempDir()
+	installer := &Installer{
+		projectGitRoot: tempDir,
+		globalGitRoot:  tempDir,
+	}
 	manifestPath := filepath.Join(tempDir, "packages.json")

 	// Test loading non-existent manifest
-	manifest, err := loadManifestFromPath(manifestPath)
+	manifest, err := installer.loadManifest(ScopeGlobal)
 	if err != nil {
-		t.Fatalf("loadManifestFromPath() error = %v", err)
+		t.Fatalf("loadManifest() error = %v", err)
 	}
 	if len(manifest.Packages) != 0 {
 		t.Errorf("Expected empty packages, got %d", len(manifest.Packages))
@@ -273,15 +280,20 @@ func TestLoadAndSaveManifest(t *testing.T) {
 	}

 	// Save it
-	err = saveManifestToPath(manifest, manifestPath)
+	err = installer.saveManifest(manifest, ScopeGlobal)
 	if err != nil {
-		t.Fatalf("saveManifestToPath() error = %v", err)
+		t.Fatalf("saveManifest() error = %v", err)
+	}
+
+	// Verify it was written to expected path
+	if _, err := os.Stat(manifestPath); err != nil {
+		t.Fatalf("manifest file not created: %v", err)
 	}

 	// Load it back
-	loaded, err := loadManifestFromPath(manifestPath)
+	loaded, err := installer.loadManifest(ScopeGlobal)
 	if err != nil {
-		t.Fatalf("loadManifestFromPath() error = %v", err)
+		t.Fatalf("loadManifest() error = %v", err)
 	}
 	if len(loaded.Packages) != 1 {
 		t.Errorf("Expected 1 package, got %d", len(loaded.Packages))
@@ -291,21 +303,15 @@ func TestLoadAndSaveManifest(t *testing.T) {
 	}
 }

+// TestAddAndRemoveFromManifest verifies that *Installer.addToManifest
+// followed by removeFromManifest leaves the manifest in its original
+// (empty) state, using a temp-directory installer scope.
 func TestAddAndRemoveFromManifest(t *testing.T) {
 	tempDir := t.TempDir()
-
-	// Set up environment for manifest path
-	if err := os.Setenv("XDG_DATA_HOME", tempDir); err != nil {
-		t.Fatalf("Setenv() error = %v", err)
+	installer := &Installer{
+		projectGitRoot: tempDir,
+		globalGitRoot:  tempDir,
 	}
-	defer func() {
-		if err := os.Unsetenv("XDG_DATA_HOME"); err != nil {
-			t.Logf("Unsetenv() error = %v", err)
-		}
-	}()
-
-	// The manifest path when XDG_DATA_HOME is set
-	manifestPath := filepath.Join(tempDir, "kit", "git", "packages.json")

 	// Add an entry
 	entry := ManifestEntry{
@@ -315,58 +321,51 @@ func TestAddAndRemoveFromManifest(t *testing.T) {
 		Scope:  ScopeGlobal,
 	}

-	err := addEntryToManifest(entry, ScopeGlobal)
-	if err != nil {
-		t.Fatalf("addEntryToManifest() error = %v", err)
+	if err := installer.addToManifest(entry, ScopeGlobal); err != nil {
+		t.Fatalf("addToManifest() error = %v", err)
 	}

 	// Verify it was added
-	manifest, err := loadManifestFromPath(manifestPath)
+	manifest, err := installer.loadManifest(ScopeGlobal)
 	if err != nil {
-		t.Fatalf("loadManifestFromPath() error = %v", err)
+		t.Fatalf("loadManifest() error = %v", err)
 	}
 	if len(manifest.Packages) != 1 {
 		t.Errorf("Expected 1 package, got %d", len(manifest.Packages))
 	}

 	// Remove it
-	err = removeEntryFromManifest("github.com/user/repo", ScopeGlobal)
-	if err != nil {
-		t.Fatalf("removeEntryFromManifest() error = %v", err)
+	if err := installer.removeFromManifest("github.com/user/repo", ScopeGlobal); err != nil {
+		t.Fatalf("removeFromManifest() error = %v", err)
 	}

 	// Verify it was removed
-	manifest, err = loadManifestFromPath(manifestPath)
+	manifest, err = installer.loadManifest(ScopeGlobal)
 	if err != nil {
-		t.Fatalf("loadManifestFromPath() error = %v", err)
+		t.Fatalf("loadManifest() error = %v", err)
 	}
 	if len(manifest.Packages) != 0 {
 		t.Errorf("Expected 0 packages, got %d", len(manifest.Packages))
 	}
 }

+// TestFindInManifest writes a manifest file directly to the path
+// resolved by the package-level manifestPathForScope helper and then
+// confirms FindInManifest locates the entry by identity (and returns
+// nil for a non-existent identity).
 func TestFindInManifest(t *testing.T) {
 	tempDir := t.TempDir()
-	if err := os.Setenv("XDG_DATA_HOME", tempDir); err != nil {
-		t.Fatalf("Setenv() error = %v", err)
-	}
-	defer func() {
-		if err := os.Unsetenv("XDG_DATA_HOME"); err != nil {
-			t.Logf("Unsetenv() error = %v", err)
-		}
-	}()
+	t.Setenv("XDG_DATA_HOME", tempDir)

-	// Add an entry to global manifest
-	entry := ManifestEntry{
-		Source: "git:github.com/user/repo",
-		Host:   "github.com",
-		Path:   "user/repo",
-		Scope:  ScopeGlobal,
+	// Write a manifest entry directly via the package-level path resolver
+	// so FindInManifest (which uses manifestPathForScope) can read it back.
+	manifestPath := manifestPathForScope(ScopeGlobal)
+	if err := os.MkdirAll(filepath.Dir(manifestPath), 0755); err != nil {
+		t.Fatalf("MkdirAll() error = %v", err)
 	}
-
-	err := addEntryToManifest(entry, ScopeGlobal)
-	if err != nil {
-		t.Fatalf("addEntryToManifest() error = %v", err)
+	data := []byte(`{"packages":[{"source":"git:github.com/user/repo","repo":"","host":"github.com","path":"user/repo","pinned":false,"scope":"global","installed":"0001-01-01T00:00:00Z"}]}`)
+	if err := os.WriteFile(manifestPath, data, 0644); err != nil {
+		t.Fatalf("WriteFile() error = %v", err)
 	}

 	// Find it
@@ -0,0 +1,119 @@
+package extensions
+
+import "testing"
+
+func TestRunner_EmitLLMUsage(t *testing.T) {
+	var got LLMUsageEvent
+	var called bool
+	ext := makeHandlerExt("llmusage.go", map[EventType][]HandlerFunc{
+		LLMUsage: {
+			func(e Event, c Context) Result {
+				got = e.(LLMUsageEvent)
+				called = true
+				return nil
+			},
+		},
+	})
+
+	r := makeRunner(ext)
+	_, err := r.Emit(LLMUsageEvent{
+		InputTokens:  100,
+		OutputTokens: 50,
+		Cost:         0.0012,
+		Model:        "claude-sonnet-4-5-20250929",
+		Provider:     "anthropic",
+		StepNumber:   2,
+		FinishReason: "tool_calls",
+	})
+	if err != nil {
+		t.Fatalf("emit: %v", err)
+	}
+	if !called {
+		t.Fatal("expected LLMUsage handler to be called")
+	}
+	if got.InputTokens != 100 || got.OutputTokens != 50 {
+		t.Errorf("token fields not propagated: %+v", got)
+	}
+	if got.Cost != 0.0012 {
+		t.Errorf("cost not propagated, got %v", got.Cost)
+	}
+	if got.Model != "claude-sonnet-4-5-20250929" || got.Provider != "anthropic" {
+		t.Errorf("model/provider not propagated: %+v", got)
+	}
+	if got.StepNumber != 2 || got.FinishReason != "tool_calls" {
+		t.Errorf("step/finish reason not propagated: %+v", got)
+	}
+}
+
+func TestRunner_LLMUsageRegisteredViaTestAPI(t *testing.T) {
+	// Verify NewTestAPI wires up onLLMUsage so the extension can call
+	// api.OnLLMUsage during Init.
+	ext := &LoadedExtension{Handlers: make(map[EventType][]HandlerFunc)}
+	api := NewTestAPI(ext)
+
+	var calls int
+	api.OnLLMUsage(func(e LLMUsageEvent, c Context) {
+		calls++
+	})
+
+	if len(ext.Handlers[LLMUsage]) != 1 {
+		t.Fatalf("expected 1 LLMUsage handler registered, got %d", len(ext.Handlers[LLMUsage]))
+	}
+
+	r := makeRunner(*ext)
+	_, _ = r.Emit(LLMUsageEvent{InputTokens: 1})
+	if calls != 1 {
+		t.Errorf("expected handler called once, got %d", calls)
+	}
+}
+
+func TestAgentEndEvent_EnrichedFields(t *testing.T) {
+	// Verify the enriched event carries through Emit without mangling.
+	var got AgentEndEvent
+	ext := makeHandlerExt("end.go", map[EventType][]HandlerFunc{
+		AgentEnd: {
+			func(e Event, c Context) Result {
+				got = e.(AgentEndEvent)
+				return nil
+			},
+		},
+	})
+	r := makeRunner(ext)
+	_, err := r.Emit(AgentEndEvent{
+		Response:              "done",
+		StopReason:            "completed",
+		ToolCallCount:         3,
+		ToolNames:             []string{"bash", "read", "bash"},
+		LLMCallCount:          4,
+		InputTokensDelta:      1500,
+		OutputTokensDelta:     400,
+		CacheReadTokensDelta:  200,
+		CacheWriteTokensDelta: 100,
+		CostDelta:             0.0123,
+		DurationMs:            2500,
+	})
+	if err != nil {
+		t.Fatalf("emit: %v", err)
+	}
+	if got.ToolCallCount != 3 {
+		t.Errorf("ToolCallCount: got %d want 3", got.ToolCallCount)
+	}
+	if len(got.ToolNames) != 3 || got.ToolNames[0] != "bash" || got.ToolNames[2] != "bash" {
+		t.Errorf("ToolNames: %v", got.ToolNames)
+	}
+	if got.LLMCallCount != 4 {
+		t.Errorf("LLMCallCount: got %d want 4", got.LLMCallCount)
+	}
+	if got.InputTokensDelta != 1500 || got.OutputTokensDelta != 400 {
+		t.Errorf("token deltas: %+v", got)
+	}
+	if got.CacheReadTokensDelta != 200 || got.CacheWriteTokensDelta != 100 {
+		t.Errorf("cache deltas: %+v", got)
+	}
+	if got.CostDelta != 0.0123 {
+		t.Errorf("CostDelta: got %v", got.CostDelta)
+	}
+	if got.DurationMs != 2500 {
+		t.Errorf("DurationMs: got %d", got.DurationMs)
+	}
+}
@@ -34,15 +34,10 @@ func LoadExtensions(extraPaths []string) ([]LoadedExtension, error) {
 	for _, p := range paths {
 		ext, err := loadSingleExtension(p)
 		if err != nil {
-			log.Warn("skipping extension", "path", p, "err", err)
 			continue
 		}
 		loaded = append(loaded, *ext)
-		log.Debug("loaded extension", "path", p,
-			"handlers", countHandlers(ext),
-			"tools", len(ext.Tools),
-			"commands", len(ext.Commands),
-			"tool_renderers", len(ext.ToolRenderers))
+		log.Debug("loaded extension", "path", p, "handlers", countHandlers(ext), "tools", len(ext.Tools), "commands", len(ext.Commands), "tool_renderers", len(ext.ToolRenderers))
 	}
 	return loaded, nil
 }
@@ -377,8 +372,12 @@ func loadSingleExtension(path string) (*LoadedExtension, error) {
 		Handlers: make(map[EventType][]HandlerFunc),
 	}

-	// Create a fresh interpreter.
-	i := interp.New(interp.Options{})
+	// Create a fresh interpreter. Yaegi runs extensions in restricted mode,
+	// where os.Getenv/os.LookupEnv/os.Environ read from a virtualized
+	// environment rather than the real one. Seed it with the process
+	// environment so extensions can read variables (e.g. CI-provided ones
+	// like GITHUB_EVENT_PATH) without being able to mutate the host's env.
+	i := interp.New(interp.Options{Env: os.Environ()})

 	// Expose the Go stdlib. The base set covers most packages; the
 	// unrestricted set adds os/exec so extensions can spawn processes.
@@ -434,6 +433,24 @@ func loadSingleExtension(path string) (*LoadedExtension, error) {
 				return *r
 			})
 		},
+		onToolCallInputStart: func(h func(ToolCallInputStartEvent, Context)) {
+			reg(ToolCallInputStart, func(e Event, c Context) Result {
+				h(e.(ToolCallInputStartEvent), c)
+				return nil
+			})
+		},
+		onToolCallInputDelta: func(h func(ToolCallInputDeltaEvent, Context)) {
+			reg(ToolCallInputDelta, func(e Event, c Context) Result {
+				h(e.(ToolCallInputDeltaEvent), c)
+				return nil
+			})
+		},
+		onToolCallInputEnd: func(h func(ToolCallInputEndEvent, Context)) {
+			reg(ToolCallInputEnd, func(e Event, c Context) Result {
+				h(e.(ToolCallInputEndEvent), c)
+				return nil
+			})
+		},
 		onToolExecStart: func(h func(ToolExecutionStartEvent, Context)) {
 			reg(ToolExecutionStart, func(e Event, c Context) Result {
 				h(e.(ToolExecutionStartEvent), c)
@@ -605,6 +622,63 @@ func loadSingleExtension(path string) (*LoadedExtension, error) {
 				return nil
 			})
 		},
+		onStepStart: func(h func(StepStartEvent, Context)) {
+			reg(StepStart, func(e Event, c Context) Result {
+				h(e.(StepStartEvent), c)
+				return nil
+			})
+		},
+		onStepFinish: func(h func(StepFinishEvent, Context)) {
+			reg(StepFinish, func(e Event, c Context) Result {
+				h(e.(StepFinishEvent), c)
+				return nil
+			})
+		},
+		onReasoningStart: func(h func(ReasoningStartEvent, Context)) {
+			reg(ReasoningStart, func(e Event, c Context) Result {
+				h(e.(ReasoningStartEvent), c)
+				return nil
+			})
+		},
+		onWarnings: func(h func(WarningsEvent, Context)) {
+			reg(Warnings, func(e Event, c Context) Result {
+				h(e.(WarningsEvent), c)
+				return nil
+			})
+		},
+		onSource: func(h func(SourceEvent, Context)) {
+			reg(Source, func(e Event, c Context) Result {
+				h(e.(SourceEvent), c)
+				return nil
+			})
+		},
+		onError: func(h func(ErrorEvent, Context)) {
+			reg(Error, func(e Event, c Context) Result {
+				h(e.(ErrorEvent), c)
+				return nil
+			})
+		},
+		onRetry: func(h func(RetryEvent, Context)) {
+			reg(Retry, func(e Event, c Context) Result {
+				h(e.(RetryEvent), c)
+				return nil
+			})
+		},
+		onPrepareStep: func(h func(PrepareStepEvent, Context) *PrepareStepResult) {
+			reg(PrepareStep, func(e Event, c Context) Result {
+				r := h(e.(PrepareStepEvent), c)
+				if r == nil {
+					return nil
+				}
+				return *r
+			})
+		},
+		onLLMUsage: func(h func(LLMUsageEvent, Context)) {
+			reg(LLMUsage, func(e Event, c Context) Result {
+				h(e.(LLMUsageEvent), c)
+				return nil
+			})
+		},
 	}

 	// Call Init — the extension registers its handlers, tools, commands.
@@ -72,30 +72,6 @@ func loadManifestFromPath(path string) (*Manifest, error) {
 	return &manifest, nil
 }

-// saveManifestToScope saves the manifest to the given scope.
-func saveManifestToScope(manifest *Manifest, scope InstallScope) error {
-	path := manifestPathForScope(scope)
-	return saveManifestToPath(manifest, path)
-}
-
-// saveManifestToPath saves a manifest to a specific file path.
-func saveManifestToPath(manifest *Manifest, path string) error {
-	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
-		return fmt.Errorf("creating manifest directory: %w", err)
-	}
-
-	data, err := json.MarshalIndent(manifest, "", "  ")
-	if err != nil {
-		return fmt.Errorf("encoding manifest: %w", err)
-	}
-
-	if err := os.WriteFile(path, data, 0644); err != nil {
-		return fmt.Errorf("writing manifest: %w", err)
-	}
-
-	return nil
-}
-
 // manifestPathForScope returns the manifest file path for a scope.
 func manifestPathForScope(scope InstallScope) string {
 	if scope == ScopeProject {
@@ -113,55 +89,6 @@ func manifestPathForScope(scope InstallScope) string {
 	return filepath.Join(base, "kit", "git", "packages.json")
 }

-// GetGlobalManifest returns the global manifest.
-func GetGlobalManifest() (*Manifest, error) {
-	return loadManifestFromScope(ScopeGlobal)
-}
-
-// GetProjectManifest returns the project manifest.
-func GetProjectManifest() (*Manifest, error) {
-	return loadManifestFromScope(ScopeProject)
-}
-
-// addEntryToManifest adds or replaces an entry in the manifest for a scope.
-func addEntryToManifest(entry ManifestEntry, scope InstallScope) error {
-	manifest, err := loadManifestFromScope(scope)
-	if err != nil {
-		return err
-	}
-
-	// Remove any existing entry with same identity
-	identity := entry.Identity()
-	filtered := make([]ManifestEntry, 0, len(manifest.Packages))
-	for _, p := range manifest.Packages {
-		if p.Identity() != identity {
-			filtered = append(filtered, p)
-		}
-	}
-	filtered = append(filtered, entry)
-	manifest.Packages = filtered
-
-	return saveManifestToScope(manifest, scope)
-}
-
-// removeEntryFromManifest removes an entry by identity from the manifest for a scope.
-func removeEntryFromManifest(identity string, scope InstallScope) error {
-	manifest, err := loadManifestFromScope(scope)
-	if err != nil {
-		return err
-	}
-
-	filtered := make([]ManifestEntry, 0, len(manifest.Packages))
-	for _, p := range manifest.Packages {
-		if p.Identity() != identity {
-			filtered = append(filtered, p)
-		}
-	}
-	manifest.Packages = filtered
-
-	return saveManifestToScope(manifest, scope)
-}
-
 // FindInManifest finds an entry by identity in either global or project manifest.
 // Returns the entry and its scope, or nil if not found.
 func FindInManifest(identity string) (*ManifestEntry, InstallScope, error) {
@@ -1,21 +1,96 @@
 package extensions

 import (
+	"bytes"
+	"encoding/json"
 	"fmt"
+	"log"
+	"maps"
 	"os"
+	"path/filepath"
+	"runtime"
 	"sort"
+	"strconv"
 	"strings"
 	"sync"

-	"github.com/charmbracelet/log"
 	"github.com/spf13/viper"
 )

+// ---------------------------------------------------------------------------
+// reentrantMu — a per-extension mutex that allows the same goroutine to
+// re-enter (e.g. handler → ctx.EmitCustomEvent → handler in same extension).
+// Different goroutines are serialized, preventing concurrent state mutation.
+// ---------------------------------------------------------------------------
+
+type reentrantMu struct {
+	mu    sync.Mutex
+	cond  *sync.Cond
+	owner int64 // goroutine ID that holds the lock, or 0
+	depth int   // re-entrancy depth
+}
+
+// initReentrantMu initializes the reentrant mutex in-place. Must be called
+// after the struct is at its final memory location (not before copying).
+func (r *reentrantMu) init() {
+	r.cond = sync.NewCond(&r.mu)
+}
+
+// lock acquires the mutex. If the calling goroutine already holds it, the
+// call succeeds immediately (re-entrant). Every call to lock must be paired
+// with a call to unlock.
+func (r *reentrantMu) lock() {
+	gid := goroutineID()
+	r.mu.Lock()
+	if r.owner == gid {
+		// Re-entrant: same goroutine already holds the lock.
+		r.depth++
+		r.mu.Unlock()
+		return
+	}
+	// Wait for the current owner to release.
+	for r.owner != 0 {
+		r.cond.Wait() // releases mu, blocks, re-acquires mu on wake
+	}
+	r.owner = gid
+	r.depth = 1
+	r.mu.Unlock()
+}
+
+// unlock releases the mutex (or decrements re-entrancy depth).
+func (r *reentrantMu) unlock() {
+	r.mu.Lock()
+	r.depth--
+	if r.depth == 0 {
+		r.owner = 0
+		r.cond.Signal()
+	}
+	r.mu.Unlock()
+}
+
+// goroutineID extracts the current goroutine's ID from runtime.Stack output.
+// This is a well-known technique used by Go testing infrastructure.
+func goroutineID() int64 {
+	var buf [64]byte
+	n := runtime.Stack(buf[:], false)
+	// Stack output starts with "goroutine NNN ["
+	s := buf[:n]
+	s = s[len("goroutine "):]
+	s = s[:bytes.IndexByte(s, ' ')]
+	id, _ := strconv.ParseInt(string(s), 10, 64)
+	return id
+}
+
 // Runner manages loaded extensions and dispatches events to their handlers
 // sequentially. Handlers execute in extension
 // load order; for cancellable events the first blocking result wins.
+//
+// Each extension has a dedicated reentrant mutex so that handlers for the
+// same extension are serialized (preventing data races on shared package-level
+// state), while handlers for different extensions may execute concurrently.
 type Runner struct {
 	extensions      []LoadedExtension
+	extMu           []reentrantMu // per-extension reentrant mutex, indexed by extension position
 	ctx             Context
 	widgets         map[string]WidgetConfig   // keyed by widget ID
 	statusEntries   map[string]StatusBarEntry // keyed by status key
@@ -26,9 +101,24 @@ type Runner struct {
 	disabledTools   map[string]bool           // nil = all tools enabled
 	customEventSubs map[string][]func(string) // inter-extension event bus
 	optionOverrides map[string]string         // runtime option overrides
+	configStore     *viper.Viper              // per-instance config store (nil = global)
+	state           map[string]string         // session-scoped extension state (last-write-wins)
+	stateMu         sync.RWMutex              // guards state independently of mu
+	saverMu         sync.Mutex                // serializes stateSaver invocations so atomic-rename writes don't interleave
+	stateSaver      func()                    // optional persistence hook invoked after each state mutation
 	mu              sync.RWMutex
 }

+// SetConfigStore sets the per-instance configuration store used by GetOption
+// to resolve "options.<name>" config values. When unset (nil), GetOption falls
+// back to the process-global viper store. Threading a per-Kit store keeps
+// extension option resolution isolated between Kit instances.
+func (r *Runner) SetConfigStore(v *viper.Viper) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.configStore = v
+}
+
 // ShortcutEntry pairs a shortcut definition with its handler.
 type ShortcutEntry struct {
 	Def     ShortcutDef
@@ -52,7 +142,11 @@ type LoadedExtension struct {

 // NewRunner creates a Runner from a set of loaded extensions.
 func NewRunner(exts []LoadedExtension) *Runner {
-	return &Runner{extensions: exts}
+	mus := make([]reentrantMu, len(exts))
+	for i := range mus {
+		mus[i].init()
+	}
+	return &Runner{extensions: exts, extMu: mus}
 }

 // SetContext updates the runtime context (session ID, model, etc.) that is
@@ -98,6 +192,9 @@ func normalizeContext(ctx Context) Context {
 	if ctx.SendMultimodalMessage == nil {
 		ctx.SendMultimodalMessage = func(string, []FilePart) {}
 	}
+	if ctx.NewSession == nil {
+		ctx.NewSession = func(string) error { return fmt.Errorf("new session not available") }
+	}
 	if ctx.GetSessionUsage == nil {
 		ctx.GetSessionUsage = func() SessionUsage { return SessionUsage{} }
 	}
@@ -177,6 +274,18 @@ func normalizeContext(ctx Context) Context {
 	if ctx.GetEntries == nil {
 		ctx.GetEntries = func(string) []ExtensionEntry { return nil }
 	}
+	if ctx.SetState == nil {
+		ctx.SetState = func(string, string) {}
+	}
+	if ctx.GetState == nil {
+		ctx.GetState = func(string) (string, bool) { return "", false }
+	}
+	if ctx.DeleteState == nil {
+		ctx.DeleteState = func(string) {}
+	}
+	if ctx.ListState == nil {
+		ctx.ListState = func() []string { return nil }
+	}
 	if ctx.GetOption == nil {
 		ctx.GetOption = func(string) string { return "" }
 	}
@@ -367,13 +476,15 @@ func (r *Runner) Emit(event Event) (Result, error) {
 	for i := range r.extensions {
 		ext := &r.extensions[i]
 		handlers := ext.Handlers[event.Type()]
+		if len(handlers) == 0 {
+			continue
+		}
+
+		r.extMu[i].lock()
 		for _, handler := range handlers {
 			result, err := safeCall(handler, event, ctx)
 			if err != nil {
-				log.Warn("extension handler error",
-					"path", ext.Path,
-					"event", event.Type(),
-					"err", err)
+				log.Printf("WARN extension handler error: path=%s event=%s err=%v", ext.Path, event.Type(), err)
 				continue
 			}
 			if result == nil {
@@ -382,6 +493,7 @@ func (r *Runner) Emit(event Event) (Result, error) {

 			// Check for blocking/short-circuit results.
 			if isBlocking(result) {
+				r.extMu[i].unlock()
 				return result, nil
 			}

@@ -389,6 +501,7 @@ func (r *Runner) Emit(event Event) (Result, error) {
 			// the caller is responsible for applying the modifications.
 			accumulated = result
 		}
+		r.extMu[i].unlock()
 	}
 	return accumulated, nil
 }
@@ -654,6 +767,168 @@ func (r *Runner) GetMessageRenderer(name string) *MessageRendererConfig {
 	return nil
 }

+// ---------------------------------------------------------------------------
+// Extension state store (session-scoped, last-write-wins)
+// ---------------------------------------------------------------------------
+
+// SetState records a key-value pair in the runner's session-scoped extension
+// state store. The store is in-memory; callers wire SetStateSaver to persist
+// changes to a sidecar file. Thread-safe.
+//
+// When a saver is installed, concurrent SetState/DeleteState invocations are
+// serialized through saverMu so that overlapping snapshot-and-rename writes
+// cannot interleave (which would otherwise race on the shared tmp file and
+// risk persisting an older snapshot after a newer one).
+func (r *Runner) SetState(key, value string) {
+	r.stateMu.Lock()
+	if r.state == nil {
+		r.state = make(map[string]string)
+	}
+	r.state[key] = value
+	saver := r.stateSaver
+	r.stateMu.Unlock()
+	r.runSaver(saver)
+}
+
+// GetState returns the value previously stored via SetState, plus a bool
+// indicating whether the key was present. Thread-safe.
+func (r *Runner) GetState(key string) (string, bool) {
+	r.stateMu.RLock()
+	defer r.stateMu.RUnlock()
+	v, ok := r.state[key]
+	return v, ok
+}
+
+// DeleteState removes a key from the state store. No-op if the key is
+// missing. Thread-safe. Saver invocations are serialized via saverMu — see
+// SetState for the rationale.
+func (r *Runner) DeleteState(key string) {
+	r.stateMu.Lock()
+	_, existed := r.state[key]
+	if existed {
+		delete(r.state, key)
+	}
+	saver := r.stateSaver
+	r.stateMu.Unlock()
+	if !existed {
+		return
+	}
+	r.runSaver(saver)
+}
+
+// runSaver invokes the optional persistence callback under saverMu so
+// concurrent SetState/DeleteState writers cannot race on the shared tmp
+// file used by SaveStateToFile's atomic rename. The deferred Unlock
+// guarantees saverMu is released even if the saver panics.
+func (r *Runner) runSaver(saver func()) {
+	if saver == nil {
+		return
+	}
+	r.saverMu.Lock()
+	defer r.saverMu.Unlock()
+	saver()
+}
+
+// ListState returns all keys currently in the state store, in unspecified
+// order. Thread-safe.
+func (r *Runner) ListState() []string {
+	r.stateMu.RLock()
+	defer r.stateMu.RUnlock()
+	if len(r.state) == 0 {
+		return nil
+	}
+	keys := make([]string, 0, len(r.state))
+	for k := range r.state {
+		keys = append(keys, k)
+	}
+	return keys
+}
+
+// SetStateSaver installs an optional persistence hook invoked after each
+// mutation to the state store (SetState / DeleteState / LoadStateFromFile).
+// Pass nil to disable persistence. Thread-safe.
+func (r *Runner) SetStateSaver(saver func()) {
+	r.stateMu.Lock()
+	defer r.stateMu.Unlock()
+	r.stateSaver = saver
+}
+
+// SnapshotState returns a copy of the current state store as a
+// fresh map. Useful for persisting to disk without holding the lock.
+// Thread-safe.
+func (r *Runner) SnapshotState() map[string]string {
+	r.stateMu.RLock()
+	defer r.stateMu.RUnlock()
+	if len(r.state) == 0 {
+		return nil
+	}
+	copyMap := make(map[string]string, len(r.state))
+	maps.Copy(copyMap, r.state)
+	return copyMap
+}
+
+// LoadStateFromFile reads a JSON map from path and replaces the in-memory
+// state store with its contents. Missing or empty files are treated as
+// "no prior state": the in-memory store is replaced with an empty map so
+// callers can safely switch sessions without leaking keys from a prior
+// session into a new one. Malformed JSON returns the parse error without
+// touching the existing store. Thread-safe.
+func (r *Runner) LoadStateFromFile(path string) error {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			r.stateMu.Lock()
+			r.state = map[string]string{}
+			r.stateMu.Unlock()
+			return nil
+		}
+		return fmt.Errorf("reading extension state: %w", err)
+	}
+	if len(data) == 0 {
+		r.stateMu.Lock()
+		r.state = map[string]string{}
+		r.stateMu.Unlock()
+		return nil
+	}
+	var loaded map[string]string
+	if err := json.Unmarshal(data, &loaded); err != nil {
+		return fmt.Errorf("parsing extension state: %w", err)
+	}
+	r.stateMu.Lock()
+	r.state = loaded
+	r.stateMu.Unlock()
+	return nil
+}
+
+// SaveStateToFile writes the current state store to path as JSON, creating
+// parent directories as needed. An empty store writes an empty object so
+// that consumers can distinguish "loaded but empty" from "never saved".
+// Writes are atomic via a tmp-file-and-rename sequence. Thread-safe.
+func (r *Runner) SaveStateToFile(path string) error {
+	snap := r.SnapshotState()
+	if snap == nil {
+		snap = map[string]string{}
+	}
+	data, err := json.MarshalIndent(snap, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshalling extension state: %w", err)
+	}
+	if dir := filepath.Dir(path); dir != "." && dir != "" {
+		if err := os.MkdirAll(dir, 0o755); err != nil {
+			return fmt.Errorf("creating state directory: %w", err)
+		}
+	}
+	tmp := path + ".tmp"
+	if err := os.WriteFile(tmp, data, 0o644); err != nil {
+		return fmt.Errorf("writing extension state: %w", err)
+	}
+	if err := os.Rename(tmp, path); err != nil {
+		_ = os.Remove(tmp)
+		return fmt.Errorf("renaming extension state: %w", err)
+	}
+	return nil
+}
+
 // ---------------------------------------------------------------------------
 // Hot-reload
 // ---------------------------------------------------------------------------
@@ -677,7 +952,9 @@ func (r *Runner) Reload(exts []LoadedExtension) {
 	r.uiVisibility = nil
 	r.disabledTools = nil
 	r.customEventSubs = nil
-	// optionOverrides are intentionally preserved.
+	// optionOverrides and state are intentionally preserved across reloads:
+	// they represent user/session intent (not extension code) and would be
+	// surprising to lose on a hot-reload.
 }

 // ---------------------------------------------------------------------------
@@ -707,9 +984,7 @@ func (r *Runner) EmitCustomEvent(name, data string) {
 	safeInvoke := func(h func(string)) {
 		defer func() {
 			if rec := recover(); rec != nil {
-				log.Warn("custom event handler panicked",
-					"event", name,
-					"err", fmt.Sprintf("%v", rec))
+				log.Printf("WARN custom event handler panicked: event=%s err=%v", name, rec)
 			}
 		}()
 		h(data)
@@ -717,11 +992,17 @@ func (r *Runner) EmitCustomEvent(name, data string) {

 	// Extension-registered handlers first (in load order).
 	for i := range r.extensions {
-		for _, h := range r.extensions[i].CustomEventHandlers[name] {
+		extHandlers := r.extensions[i].CustomEventHandlers[name]
+		if len(extHandlers) == 0 {
+			continue
+		}
+		r.extMu[i].lock()
+		for _, h := range extHandlers {
 			safeInvoke(h)
 		}
+		r.extMu[i].unlock()
 	}
-	// Then dynamic subscriptions.
+	// Then dynamic subscriptions (not extension-scoped, no per-ext lock).
 	for _, h := range dynamicHandlers {
 		safeInvoke(h)
 	}
@@ -788,7 +1069,13 @@ func (r *Runner) GetOption(name string) string {

 	// 3. Viper config: options.<name>
 	configKey := "options." + name
-	if v := viper.GetString(configKey); v != "" {
+	r.mu.RLock()
+	store := r.configStore
+	r.mu.RUnlock()
+	if store == nil {
+		store = viper.GetViper()
+	}
+	if v := store.GetString(configKey); v != "" {
 		return v
 	}

@@ -1,6 +1,7 @@
 package extensions

 import (
+	"sync"
 	"testing"
 )

@@ -571,3 +572,142 @@ func TestRunner_ContextPrintNilSafe(t *testing.T) {
 		t.Fatalf("unexpected error: %v", err)
 	}
 }
+
+func TestRunner_ConcurrentEmitSameExtension(t *testing.T) {
+	// Verify that concurrent Emit calls for the same extension are serialized
+	// and don't cause data races on shared handler state.
+	var counter int
+	ext := makeHandlerExt("shared-state.go", map[EventType][]HandlerFunc{
+		SubagentStart: {
+			func(e Event, c Context) Result {
+				// Read-modify-write: racy without serialization.
+				v := counter
+				counter = v + 1
+				return nil
+			},
+		},
+		SubagentChunk: {
+			func(e Event, c Context) Result {
+				v := counter
+				counter = v + 1
+				return nil
+			},
+		},
+	})
+
+	r := makeRunner(ext)
+	var wg sync.WaitGroup
+	const goroutines = 20
+	const iterations = 50
+	wg.Add(goroutines)
+	for range goroutines {
+		go func() {
+			defer wg.Done()
+			for range iterations {
+				_, _ = r.Emit(SubagentStartEvent{ToolCallID: "x"})
+				_, _ = r.Emit(SubagentChunkEvent{ToolCallID: "x"})
+			}
+		}()
+	}
+	wg.Wait()
+	if counter != goroutines*iterations*2 {
+		t.Errorf("expected counter=%d, got %d (race detected)", goroutines*iterations*2, counter)
+	}
+}
+
+func TestRunner_ConcurrentEmitDifferentExtensions(t *testing.T) {
+	// Two extensions with independent state should not block each other
+	// and should both run correctly under concurrent Emit calls.
+	var counter1, counter2 int
+	ext1 := makeHandlerExt("ext1.go", map[EventType][]HandlerFunc{
+		SubagentStart: {
+			func(e Event, c Context) Result {
+				v := counter1
+				counter1 = v + 1
+				return nil
+			},
+		},
+	})
+	ext2 := makeHandlerExt("ext2.go", map[EventType][]HandlerFunc{
+		SubagentStart: {
+			func(e Event, c Context) Result {
+				v := counter2
+				counter2 = v + 1
+				return nil
+			},
+		},
+	})
+
+	r := makeRunner(ext1, ext2)
+	var wg sync.WaitGroup
+	const goroutines = 20
+	const iterations = 50
+	wg.Add(goroutines)
+	for range goroutines {
+		go func() {
+			defer wg.Done()
+			for range iterations {
+				_, _ = r.Emit(SubagentStartEvent{ToolCallID: "x"})
+			}
+		}()
+	}
+	wg.Wait()
+	expected := goroutines * iterations
+	if counter1 != expected {
+		t.Errorf("ext1 counter: expected %d, got %d", expected, counter1)
+	}
+	if counter2 != expected {
+		t.Errorf("ext2 counter: expected %d, got %d", expected, counter2)
+	}
+}
+
+func TestRunner_ReentrantEmitCustomEvent(t *testing.T) {
+	// Verify that a handler can call EmitCustomEvent (which dispatches to
+	// the same extension's custom event handlers) without deadlocking.
+	var order []string
+	ext := LoadedExtension{
+		Path: "reentrant.go",
+		Handlers: map[EventType][]HandlerFunc{
+			SessionStart: {
+				func(e Event, c Context) Result {
+					order = append(order, "session_start")
+					// This triggers EmitCustomEvent for the same extension
+					// via a direct runner call (simulating ctx.EmitCustomEvent).
+					return nil
+				},
+			},
+		},
+		CustomEventHandlers: map[string][]func(string){
+			"test-event": {
+				func(data string) {
+					order = append(order, "custom:"+data)
+				},
+			},
+		},
+	}
+
+	r := makeRunner(ext)
+
+	// Wire up the handler to call EmitCustomEvent re-entrantly.
+	ext.Handlers[SessionStart] = []HandlerFunc{
+		func(e Event, c Context) Result {
+			order = append(order, "session_start")
+			r.EmitCustomEvent("test-event", "hello")
+			return nil
+		},
+	}
+	r.extensions[0] = ext
+	// Rebuild mutexes after modifying extensions slice.
+	r.extMu = make([]reentrantMu, len(r.extensions))
+	for i := range r.extMu {
+		r.extMu[i].init()
+	}
+
+	_, err := r.Emit(SessionStartEvent{})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(order) != 2 || order[0] != "session_start" || order[1] != "custom:hello" {
+		t.Errorf("expected [session_start, custom:hello], got %v", order)
+	}
+}
@@ -0,0 +1,262 @@
+package extensions
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestRunner_State_BasicSetGetDelete(t *testing.T) {
+	r := NewRunner(nil)
+
+	if _, ok := r.GetState("missing"); ok {
+		t.Fatal("expected GetState to return ok=false for missing key")
+	}
+
+	r.SetState("a", "1")
+	r.SetState("b", "2")
+	r.SetState("a", "3") // last-write-wins
+
+	if v, ok := r.GetState("a"); !ok || v != "3" {
+		t.Errorf("expected GetState(a)=(3,true), got (%q,%v)", v, ok)
+	}
+	if v, ok := r.GetState("b"); !ok || v != "2" {
+		t.Errorf("expected GetState(b)=(2,true), got (%q,%v)", v, ok)
+	}
+
+	keys := r.ListState()
+	if len(keys) != 2 {
+		t.Errorf("expected 2 keys, got %d (%v)", len(keys), keys)
+	}
+
+	r.DeleteState("a")
+	if _, ok := r.GetState("a"); ok {
+		t.Error("expected key a to be gone after DeleteState")
+	}
+	if len(r.ListState()) != 1 {
+		t.Errorf("expected 1 key after delete, got %v", r.ListState())
+	}
+
+	// Deleting missing key is a no-op.
+	r.DeleteState("never-there")
+}
+
+func TestRunner_State_SaverFires(t *testing.T) {
+	r := NewRunner(nil)
+	var calls int
+	var mu sync.Mutex
+	r.SetStateSaver(func() {
+		mu.Lock()
+		calls++
+		mu.Unlock()
+	})
+
+	r.SetState("a", "1")
+	r.SetState("a", "2")
+	r.DeleteState("a")
+	r.DeleteState("a") // missing → no save
+
+	mu.Lock()
+	defer mu.Unlock()
+	if calls != 3 {
+		t.Errorf("expected saver to fire 3 times (2 sets + 1 delete), got %d", calls)
+	}
+}
+
+func TestRunner_State_SaveAndLoadRoundTrip(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "ext-state.json")
+
+	r1 := NewRunner(nil)
+	r1.SetState("k1", "v1")
+	r1.SetState("k2", `{"json":"value"}`)
+	if err := r1.SaveStateToFile(path); err != nil {
+		t.Fatalf("SaveStateToFile: %v", err)
+	}
+
+	// Verify file contains JSON map.
+	data, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("reading saved file: %v", err)
+	}
+	var parsed map[string]string
+	if err := json.Unmarshal(data, &parsed); err != nil {
+		t.Fatalf("unmarshalling: %v", err)
+	}
+	if parsed["k1"] != "v1" || parsed["k2"] != `{"json":"value"}` {
+		t.Errorf("unexpected file contents: %v", parsed)
+	}
+
+	r2 := NewRunner(nil)
+	if err := r2.LoadStateFromFile(path); err != nil {
+		t.Fatalf("LoadStateFromFile: %v", err)
+	}
+	if v, ok := r2.GetState("k1"); !ok || v != "v1" {
+		t.Errorf("expected k1=v1 after load, got (%q,%v)", v, ok)
+	}
+	if v, ok := r2.GetState("k2"); !ok || v != `{"json":"value"}` {
+		t.Errorf("expected k2 to round-trip, got %q", v)
+	}
+}
+
+func TestRunner_State_LoadMissingFileClearsState(t *testing.T) {
+	// LoadStateFromFile is documented to "replace the in-memory state store
+	// with its contents"; for a missing file that means clearing the store.
+	// This is what makes session-switching safe: a new session that has not
+	// yet written a sidecar must not inherit keys from a prior session.
+	r := NewRunner(nil)
+	r.SetState("a", "1")
+	if err := r.LoadStateFromFile(filepath.Join(t.TempDir(), "does-not-exist.json")); err != nil {
+		t.Errorf("expected nil error for missing file, got %v", err)
+	}
+	if _, ok := r.GetState("a"); ok {
+		t.Error("expected pre-existing state to be cleared when target file is missing")
+	}
+	if keys := r.ListState(); keys != nil {
+		t.Errorf("expected ListState() to be nil after clearing, got %v", keys)
+	}
+}
+
+func TestRunner_State_LoadEmptyFileClearsState(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "empty.json")
+	if err := os.WriteFile(path, nil, 0o644); err != nil {
+		t.Fatal(err)
+	}
+	r := NewRunner(nil)
+	r.SetState("a", "1")
+	if err := r.LoadStateFromFile(path); err != nil {
+		t.Errorf("expected nil error for empty file, got %v", err)
+	}
+	if _, ok := r.GetState("a"); ok {
+		t.Error("expected pre-existing state to be cleared when target file is empty")
+	}
+}
+
+func TestRunner_State_LoadMalformedFileError(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "bad.json")
+	if err := os.WriteFile(path, []byte("{not json"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+	r := NewRunner(nil)
+	if err := r.LoadStateFromFile(path); err == nil {
+		t.Error("expected error loading malformed JSON, got nil")
+	}
+}
+
+func TestRunner_State_PersistenceViaSaver(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "ext-state.json")
+
+	r := NewRunner(nil)
+	r.SetStateSaver(func() {
+		_ = r.SaveStateToFile(path)
+	})
+	r.SetState("hello", "world")
+
+	// File should exist with the value already.
+	data, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("reading saved file: %v", err)
+	}
+	var parsed map[string]string
+	if err := json.Unmarshal(data, &parsed); err != nil {
+		t.Fatalf("unmarshalling: %v", err)
+	}
+	if parsed["hello"] != "world" {
+		t.Errorf("expected file to contain hello=world, got %v", parsed)
+	}
+}
+
+func TestRunner_State_ConcurrentSet(t *testing.T) {
+	r := NewRunner(nil)
+	var wg sync.WaitGroup
+	const goroutines = 16
+	const iterations = 100
+	wg.Add(goroutines)
+	for range goroutines {
+		go func() {
+			defer wg.Done()
+			for range iterations {
+				r.SetState("k", "v")
+				_, _ = r.GetState("k")
+			}
+		}()
+	}
+	wg.Wait()
+	if v, ok := r.GetState("k"); !ok || v != "v" {
+		t.Errorf("expected k=v after concurrent writes, got (%q,%v)", v, ok)
+	}
+}
+
+func TestRunner_State_ContextNoOpsWhenUnset(t *testing.T) {
+	// Verify normalizeContext installs safe no-ops for SetState/GetState/etc.
+	// when not provided by the caller.
+	ext := makeHandlerExt("state.go", map[EventType][]HandlerFunc{
+		SessionStart: {
+			func(e Event, c Context) Result {
+				// All four state functions should be non-nil and safe to call.
+				c.SetState("a", "b")
+				if v, ok := c.GetState("a"); ok || v != "" {
+					t.Errorf("no-op GetState should return (\"\", false); got (%q,%v)", v, ok)
+				}
+				c.DeleteState("a")
+				if keys := c.ListState(); keys != nil {
+					t.Errorf("no-op ListState should return nil; got %v", keys)
+				}
+				return nil
+			},
+		},
+	})
+	r := makeRunner(ext)
+	// SetContext with empty Context to exercise normalizeContext defaults.
+	r.SetContext(Context{})
+	_, err := r.Emit(SessionStartEvent{})
+	if err != nil {
+		t.Fatalf("emit: %v", err)
+	}
+}
+
+func TestRunner_State_SaverPanicReleasesSaverMu(t *testing.T) {
+	// If the saver callback panics (e.g. disk full mid-write), runSaver
+	// must still release saverMu so subsequent SetState/DeleteState calls
+	// can make progress. Without `defer Unlock()` the lock would be
+	// permanently held and the next write would deadlock.
+	r := NewRunner(nil)
+	var calls int
+	r.SetStateSaver(func() {
+		calls++
+		if calls == 1 {
+			panic("simulated disk-write failure")
+		}
+	})
+
+	// First call panics. Recover, then verify a follow-up call still works
+	// without blocking (proving saverMu was released).
+	func() {
+		defer func() {
+			if rec := recover(); rec == nil {
+				t.Fatal("expected panic from first saver invocation")
+			}
+		}()
+		r.SetState("a", "1")
+	}()
+
+	done := make(chan struct{})
+	go func() {
+		r.SetState("b", "2") // would deadlock if saverMu were still held
+		close(done)
+	}()
+	select {
+	case <-done:
+	case <-time.After(2 * time.Second):
+		t.Fatal("SetState after saver panic blocked — saverMu was not released")
+	}
+	if calls != 2 {
+		t.Errorf("expected saver to fire twice (panic + recovery write), got %d", calls)
+	}
+}
@@ -2,22 +2,15 @@
 package extensions

 import (
-	"bufio"
-	"context"
-	"encoding/json"
 	"fmt"
 	"os"
-	"os/exec"
-	"strings"
 	"sync"
-	"sync/atomic"
 	"time"
 )

 // ---------------------------------------------------------------------------
 // Subagent types
 // ---------------------------------------------------------------------------
-
 // SubagentConfig configures a subagent spawn.
 type SubagentConfig struct {
 	// Prompt is the task/instruction for the subagent (required).
@@ -157,221 +150,3 @@ func (h *SubagentHandle) Wait() SubagentResult {
 func (h *SubagentHandle) Done() <-chan struct{} {
 	return h.done
 }
-
-// ---------------------------------------------------------------------------
-// Internal helpers
-// ---------------------------------------------------------------------------
-
-// subagentJSONOutput matches the JSON envelope produced by `kit --json`.
-type subagentJSONOutput struct {
-	Response   string `json:"response"`
-	StopReason string `json:"stop_reason,omitempty"`
-	SessionID  string `json:"session_id,omitempty"`
-	Usage      *struct {
-		InputTokens  int64 `json:"input_tokens"`
-		OutputTokens int64 `json:"output_tokens"`
-	} `json:"usage,omitempty"`
-}
-
-var subagentCounter atomic.Uint64
-
-func generateSubagentID() string {
-	n := subagentCounter.Add(1)
-	return fmt.Sprintf("sub-%d-%d", time.Now().UnixNano(), n)
-}
-
-func findKitBinary() string {
-	// Try the current process executable first.
-	if exe, err := os.Executable(); err == nil {
-		if _, err := os.Stat(exe); err == nil {
-			return exe
-		}
-	}
-	// Fall back to PATH lookup.
-	if p, err := exec.LookPath("kit"); err == nil {
-		return p
-	}
-	return "kit"
-}
-
-// ---------------------------------------------------------------------------
-// SpawnSubagent implementation
-// ---------------------------------------------------------------------------
-
-// SpawnSubagent spawns a child Kit instance to perform a task.
-//
-// When config.Blocking is true, blocks until completion and returns the result
-// directly (handle is nil). When false, returns immediately with a handle for
-// monitoring/cancellation.
-//
-// The subagent runs with --json --no-session --no-extensions flags by default,
-// ensuring isolation from the parent's extensions and session state.
-func SpawnSubagent(cfg SubagentConfig) (*SubagentHandle, *SubagentResult, error) {
-	if cfg.Prompt == "" {
-		return nil, nil, fmt.Errorf("prompt is required")
-	}
-
-	timeout := cfg.Timeout
-	if timeout == 0 {
-		timeout = 5 * time.Minute
-	}
-
-	kitBinary := findKitBinary()
-
-	// Build subprocess arguments.
-	args := []string{
-		"--json",
-		"--no-extensions",
-	}
-	if cfg.NoSession {
-		args = append(args, "--no-session")
-	}
-	if cfg.Model != "" {
-		args = append(args, "--model", cfg.Model)
-	}
-
-	// Handle system prompt - write to temp file if provided.
-	var tmpFile *os.File
-	if cfg.SystemPrompt != "" {
-		var err error
-		tmpFile, err = os.CreateTemp("", "kit-subagent-*.txt")
-		if err != nil {
-			return nil, nil, fmt.Errorf("create temp file: %w", err)
-		}
-		if _, err := tmpFile.WriteString(cfg.SystemPrompt); err != nil {
-			_ = tmpFile.Close()
-			_ = os.Remove(tmpFile.Name())
-			return nil, nil, fmt.Errorf("write system prompt: %w", err)
-		}
-		_ = tmpFile.Close()
-		args = append(args, "--system-prompt", tmpFile.Name())
-	}
-
-	// Add the prompt as a positional argument.
-	args = append(args, cfg.Prompt)
-
-	// Create command with timeout context.
-	ctx, cancel := context.WithTimeout(context.Background(), timeout)
-
-	cmd := exec.CommandContext(ctx, kitBinary, args...)
-	cmd.Env = os.Environ()
-
-	stdout, err := cmd.StdoutPipe()
-	if err != nil {
-		cancel()
-		if tmpFile != nil {
-			_ = os.Remove(tmpFile.Name())
-		}
-		return nil, nil, fmt.Errorf("stdout pipe: %w", err)
-	}
-	stderr, err := cmd.StderrPipe()
-	if err != nil {
-		cancel()
-		if tmpFile != nil {
-			_ = os.Remove(tmpFile.Name())
-		}
-		return nil, nil, fmt.Errorf("stderr pipe: %w", err)
-	}
-
-	handle := &SubagentHandle{
-		ID:   generateSubagentID(),
-		done: make(chan struct{}),
-	}
-
-	// Start the subprocess.
-	start := time.Now()
-	if err := cmd.Start(); err != nil {
-		cancel()
-		if tmpFile != nil {
-			_ = os.Remove(tmpFile.Name())
-		}
-		return nil, nil, fmt.Errorf("start subprocess: %w", err)
-	}
-
-	handle.mu.Lock()
-	handle.proc = cmd.Process
-	handle.mu.Unlock()
-
-	// Run the subprocess monitoring in a goroutine.
-	go func() {
-		defer close(handle.done)
-		defer cancel()
-		if tmpFile != nil {
-			defer func() { _ = os.Remove(tmpFile.Name()) }()
-		}
-
-		var wg sync.WaitGroup
-		var stdoutBuf strings.Builder
-
-		// Read stderr (live output).
-		wg.Go(func() {
-			scanner := bufio.NewScanner(stderr)
-			scanner.Buffer(make([]byte, 256*1024), 256*1024)
-			for scanner.Scan() {
-				line := scanner.Text()
-				if cfg.OnOutput != nil && strings.TrimSpace(line) != "" {
-					cfg.OnOutput(line + "\n")
-				}
-			}
-		})
-
-		// Read stdout (JSON output).
-		scanner := bufio.NewScanner(stdout)
-		scanner.Buffer(make([]byte, 256*1024), 256*1024)
-		for scanner.Scan() {
-			stdoutBuf.WriteString(scanner.Text() + "\n")
-		}
-
-		wg.Wait()
-		waitErr := cmd.Wait()
-		elapsed := time.Since(start)
-
-		// Build result.
-		result := SubagentResult{Elapsed: elapsed}
-		if waitErr != nil {
-			result.Error = waitErr
-			if exitErr, ok := waitErr.(*exec.ExitError); ok {
-				result.ExitCode = exitErr.ExitCode()
-			} else {
-				result.ExitCode = 1
-			}
-		}
-
-		// Parse JSON output.
-		raw := strings.TrimSpace(stdoutBuf.String())
-		var parsed subagentJSONOutput
-		if raw != "" && json.Unmarshal([]byte(raw), &parsed) == nil {
-			result.Response = parsed.Response
-			result.SessionID = parsed.SessionID
-			if parsed.Usage != nil {
-				result.Usage = &SubagentUsage{
-					InputTokens:  parsed.Usage.InputTokens,
-					OutputTokens: parsed.Usage.OutputTokens,
-				}
-			}
-		} else {
-			// Fallback: use raw stdout.
-			result.Response = raw
-		}
-
-		handle.mu.Lock()
-		handle.result = &result
-		handle.proc = nil
-		handle.mu.Unlock()
-
-		if cfg.OnComplete != nil {
-			cfg.OnComplete(result)
-		}
-	}()
-
-	if cfg.Blocking {
-		// Wait for completion and return result directly.
-		<-handle.done
-		handle.mu.Lock()
-		r := handle.result
-		handle.mu.Unlock()
-		return nil, r, nil
-	}
-
-	return handle, nil, nil
-}
@@ -28,6 +28,11 @@ func Symbols() interp.Exports {
 			"CommandDef":     reflect.ValueOf((*CommandDef)(nil)),
 			"PrintBlockOpts": reflect.ValueOf((*PrintBlockOpts)(nil)),

+			// Sentinel errors. Extensions detect them with errors.Is:
+			//
+			//   if errors.Is(err, ext.ErrAgentBusy) { ... }
+			"ErrAgentBusy": reflect.ValueOf(&ErrAgentBusy).Elem(),
+
 			// Session types
 			"SessionMessage": reflect.ValueOf((*SessionMessage)(nil)),
 			"ExtensionEntry": reflect.ValueOf((*ExtensionEntry)(nil)),
@@ -152,6 +157,9 @@ func Symbols() interp.Exports {
 			// Event structs
 			"ToolCallEvent":           reflect.ValueOf((*ToolCallEvent)(nil)),
 			"ToolCallResult":          reflect.ValueOf((*ToolCallResult)(nil)),
+			"ToolCallInputStartEvent": reflect.ValueOf((*ToolCallInputStartEvent)(nil)),
+			"ToolCallInputDeltaEvent": reflect.ValueOf((*ToolCallInputDeltaEvent)(nil)),
+			"ToolCallInputEndEvent":   reflect.ValueOf((*ToolCallInputEndEvent)(nil)),
 			"ToolExecutionStartEvent": reflect.ValueOf((*ToolExecutionStartEvent)(nil)),
 			"ToolExecutionEndEvent":   reflect.ValueOf((*ToolExecutionEndEvent)(nil)),
 			"ToolOutputEvent":         reflect.ValueOf((*ToolOutputEvent)(nil)),
@@ -169,6 +177,18 @@ func Symbols() interp.Exports {
 			"SessionStartEvent":       reflect.ValueOf((*SessionStartEvent)(nil)),
 			"SessionShutdownEvent":    reflect.ValueOf((*SessionShutdownEvent)(nil)),
 			"ModelChangeEvent":        reflect.ValueOf((*ModelChangeEvent)(nil)),
+
+			// Step lifecycle events
+			"StepStartEvent":      reflect.ValueOf((*StepStartEvent)(nil)),
+			"StepFinishEvent":     reflect.ValueOf((*StepFinishEvent)(nil)),
+			"ReasoningStartEvent": reflect.ValueOf((*ReasoningStartEvent)(nil)),
+			"WarningsEvent":       reflect.ValueOf((*WarningsEvent)(nil)),
+			"SourceEvent":         reflect.ValueOf((*SourceEvent)(nil)),
+			"ErrorEvent":          reflect.ValueOf((*ErrorEvent)(nil)),
+			"RetryEvent":          reflect.ValueOf((*RetryEvent)(nil)),
+			"PrepareStepEvent":    reflect.ValueOf((*PrepareStepEvent)(nil)),
+			"PrepareStepResult":   reflect.ValueOf((*PrepareStepResult)(nil)),
+			"LLMUsageEvent":       reflect.ValueOf((*LLMUsageEvent)(nil)),
 		},
 	}
 }
@@ -189,5 +189,11 @@ func NewTestAPI(ext *LoadedExtension) API {
 				return nil
 			})
 		},
+		onLLMUsage: func(h func(LLMUsageEvent, Context)) {
+			reg(LLMUsage, func(e Event, c Context) Result {
+				h(e.(LLMUsageEvent), c)
+				return nil
+			})
+		},
 	}
 }
@@ -0,0 +1,38 @@
+package extensions
+
+// ToolKind constants classify what a tool does, enabling UIs to render
+// appropriate visualizations (e.g. diff view for edit tools, command+output
+// for execute tools) and file trackers to identify which results contain
+// modifications.
+//
+// This is the single source of truth for tool-kind classification; the
+// pkg/kit SDK re-exports these constants.
+const (
+	ToolKindExecute  = "execute" // Shell execution (bash)
+	ToolKindEdit     = "edit"    // File modification (edit, write)
+	ToolKindRead     = "read"    // File reading (read, ls)
+	ToolKindSearch   = "search"  // Content/file search (grep, find)
+	ToolKindSubagent = "agent"   // Subagent spawning (subagent)
+)
+
+// coreToolKinds maps built-in tool names to their kind classification.
+// MCP and extension tools without an entry default to ToolKindExecute.
+var coreToolKinds = map[string]string{
+	"bash":     ToolKindExecute,
+	"edit":     ToolKindEdit,
+	"write":    ToolKindEdit,
+	"read":     ToolKindRead,
+	"ls":       ToolKindRead,
+	"grep":     ToolKindSearch,
+	"find":     ToolKindSearch,
+	"subagent": ToolKindSubagent,
+}
+
+// ToolKindFor returns the ToolKind for a given tool name, defaulting to
+// ToolKindExecute for unknown tools (including MCP tools).
+func ToolKindFor(toolName string) string {
+	if kind, ok := coreToolKinds[toolName]; ok {
+		return kind
+	}
+	return ToolKindExecute
+}
@@ -1,143 +1,32 @@
 package extensions

 import (
-	"context"
-	"fmt"
 	"os"
 	"path/filepath"
-	"strings"
-	"sync"
-	"time"

-	"github.com/charmbracelet/log"
-	"github.com/fsnotify/fsnotify"
+	"github.com/mark3labs/kit/internal/watcher"
 )

-// Watcher monitors extension directories for file changes and triggers
-// a reload callback when .go files are created, modified, or removed.
-// It uses fsnotify for kernel-level file notifications (inotify on Linux,
-// kqueue on macOS) with debouncing to coalesce rapid editor writes.
-type Watcher struct {
-	watcher  *fsnotify.Watcher
-	onReload func()
-	debounce time.Duration
-	cancel   context.CancelFunc
-	done     chan struct{}
-	mu       sync.Mutex
-}
+// Watcher monitors extension directories for .go file changes and triggers
+// a reload callback when changes are detected. It is implemented in terms
+// of the general-purpose internal/watcher.ContentWatcher.
+//
+// Type-aliasing here lets existing call sites (cmd/root.go and the
+// watcher_test.go suite) keep using `extensions.NewWatcher` / `*Watcher`
+// without knowing about the underlying implementation.
+type Watcher = watcher.ContentWatcher

 // NewWatcher creates a file watcher that monitors the given directories
 // for .go file changes. When a change is detected (after debouncing),
 // onReload is called. The watcher must be started with Start() and
 // stopped with Close().
 func NewWatcher(dirs []string, onReload func()) (*Watcher, error) {
-	fsw, err := fsnotify.NewWatcher()
-	if err != nil {
-		return nil, fmt.Errorf("creating file watcher: %w", err)
-	}
-
-	for _, dir := range dirs {
-		// Watch the directory itself.
-		if err := fsw.Add(dir); err != nil {
-			log.Debug("watcher: skipping directory", "dir", dir, "err", err)
-			continue
-		}
-
-		// Also watch immediate subdirectories (for */main.go pattern).
-		entries, err := os.ReadDir(dir)
-		if err != nil {
-			continue
-		}
-		for _, entry := range entries {
-			if entry.IsDir() {
-				subdir := filepath.Join(dir, entry.Name())
-				if err := fsw.Add(subdir); err != nil {
-					log.Debug("watcher: skipping subdirectory", "dir", subdir, "err", err)
-				}
-			}
-		}
-	}
-
-	return &Watcher{
-		watcher:  fsw,
-		onReload: onReload,
-		debounce: 300 * time.Millisecond,
-		done:     make(chan struct{}),
-	}, nil
-}
-
-// Start begins watching for file changes. It blocks until the context
-// is cancelled or Close() is called. Typically called in a goroutine.
-func (w *Watcher) Start(ctx context.Context) {
-	w.mu.Lock()
-	ctx, w.cancel = context.WithCancel(ctx)
-	w.mu.Unlock()
-
-	defer close(w.done)
-
-	var timer *time.Timer
-	var timerC <-chan time.Time
-
-	for {
-		select {
-		case <-ctx.Done():
-			if timer != nil {
-				timer.Stop()
-			}
-			return
-
-		case event, ok := <-w.watcher.Events:
-			if !ok {
-				return
-			}
-
-			// Only care about .go files.
-			if !strings.HasSuffix(event.Name, ".go") {
-				continue
-			}
-
-			// React to write, create, remove, rename events.
-			if event.Op&(fsnotify.Write|fsnotify.Create|fsnotify.Remove|fsnotify.Rename) == 0 {
-				continue
-			}
-
-			log.Debug("watcher: file changed", "file", event.Name, "op", event.Op)
-
-			// Debounce: reset timer on each event.
-			if timer != nil {
-				timer.Stop()
-			}
-			timer = time.NewTimer(w.debounce)
-			timerC = timer.C
-
-		case <-timerC:
-			timerC = nil
-			timer = nil
-			log.Debug("watcher: reloading extensions")
-			w.onReload()
-
-		case err, ok := <-w.watcher.Errors:
-			if !ok {
-				return
-			}
-			log.Warn("watcher: error", "err", err)
-		}
-	}
-}
-
-// Close stops the watcher and releases resources.
-func (w *Watcher) Close() error {
-	w.mu.Lock()
-	cancel := w.cancel
-	w.mu.Unlock()
-
-	if cancel != nil {
-		cancel()
-	}
-
-	// Wait for the event loop to finish.
-	<-w.done
-	return w.watcher.Close()
+	return watcher.New(watcher.Options{
+		Dirs:       dirs,
+		Extensions: []string{".go"},
+		OnReload:   onReload,
+		Label:      "extensions",
+	})
 }

 // WatchedDirs returns the directories to watch for extension changes.
@@ -146,47 +35,25 @@ func (w *Watcher) Close() error {
 // point to directories are also included; explicit file paths cause
 // their parent directory to be watched instead.
 func WatchedDirs(extraPaths []string) []string {
-	var dirs []string
-	seen := make(map[string]bool)
-
-	add := func(dir string) {
-		abs, err := filepath.Abs(dir)
-		if err != nil {
-			return
-		}
-		if seen[abs] {
-			return
-		}
-
-		// Verify the directory exists.
-		info, err := os.Stat(abs)
-		if err != nil || !info.IsDir() {
-			return
-		}
-
-		seen[abs] = true
-		dirs = append(dirs, abs)
+	standard := []string{
+		globalExtensionsDir(),
+		filepath.Join(".kit", "extensions"),
 	}

-	// Global extensions dir.
-	add(globalExtensionsDir())
-
-	// Project-local extensions dir.
-	add(filepath.Join(".kit", "extensions"))
-
-	// Explicit paths that are directories.
+	// Filter explicit paths into directories (passed through) and files
+	// (parent dir watched) for CollectDirs to dedupe.
+	var extras []string
 	for _, p := range extraPaths {
 		info, err := os.Stat(p)
 		if err != nil {
 			continue
 		}
 		if info.IsDir() {
-			add(p)
+			extras = append(extras, p)
 		} else {
-			// For explicit files, watch the parent directory.
-			add(filepath.Dir(p))
+			extras = append(extras, filepath.Dir(p))
 		}
 	}

-	return dirs
+	return watcher.CollectDirs(standard, extras)
 }
@@ -28,11 +28,11 @@ func WrapToolsWithExtensions(tools []fantasy.AgentTool, runner *Runner) []fantas
 	return wrapped
 }

-// ExtensionToolsAsFantasy converts ToolDef values registered by extensions
-// into fantasy.AgentTool implementations so the LLM can invoke them.
+// ExtensionToolsAsLLMTools converts ToolDef values registered by extensions
+// into LLM agent tool implementations so the LLM can invoke them.
 // The runner is optional; if provided, ToolContext.OnProgress routes
 // progress messages through the runner's Print function.
-func ExtensionToolsAsFantasy(defs []ToolDef, runner *Runner) []fantasy.AgentTool {
+func ExtensionToolsAsLLMTools(defs []ToolDef, runner *Runner) []fantasy.AgentTool {
 	tools := make([]fantasy.AgentTool, 0, len(defs))
 	for _, def := range defs {
 		tools = append(tools, &extensionTool{def: def, runner: runner})
@@ -40,27 +40,6 @@ func ExtensionToolsAsFantasy(defs []ToolDef, runner *Runner) []fantasy.AgentTool
 	return tools
 }

-// coreToolKinds maps built-in tool names to their kind classification.
-var coreToolKinds = map[string]string{
-	"bash":     "execute",
-	"edit":     "edit",
-	"write":    "edit",
-	"read":     "read",
-	"ls":       "read",
-	"grep":     "search",
-	"find":     "search",
-	"subagent": "agent",
-}
-
-// toolKindFor returns the ToolKind for a given tool name, defaulting to
-// "execute" for unknown tools (including MCP tools).
-func toolKindFor(toolName string) string {
-	if kind, ok := coreToolKinds[toolName]; ok {
-		return kind
-	}
-	return "execute"
-}
-
 // parseToolArgsJSON attempts to parse JSON-encoded tool args into a map.
 // Returns nil on failure (non-fatal convenience parsing).
 func parseToolArgsJSON(input string) map[string]any {
@@ -90,11 +69,10 @@ func (w *wrappedTool) Run(ctx context.Context, call fantasy.ToolCall) (fantasy.T
 	// 0. Check if tool is disabled via SetActiveTools.
 	if w.runner.IsToolDisabled(toolName) {
 		return fantasy.NewTextErrorResponse(
-				fmt.Sprintf("Error: tool %q is currently disabled", toolName)),
-			fmt.Errorf("tool %q disabled by extension", toolName)
+			fmt.Sprintf("Error: tool %q is currently disabled", toolName)), nil
 	}

-	kind := toolKindFor(toolName)
+	kind := ToolKindFor(toolName)

 	// 1. Emit ToolCall — extensions can block execution.
 	if w.runner.HasHandlers(ToolCall) {
@@ -111,8 +89,7 @@ func (w *wrappedTool) Run(ctx context.Context, call fantasy.ToolCall) (fantasy.T
 			if reason == "" {
 				reason = "blocked by extension"
 			}
-			return fantasy.NewTextErrorResponse(fmt.Sprintf("Error: %s", reason)),
-				fmt.Errorf("tool blocked by extension: %s", reason)
+			return fantasy.NewTextErrorResponse(fmt.Sprintf("Error: %s", reason)), nil
 		}
 	}

@@ -154,7 +131,7 @@ func (w *wrappedTool) Run(ctx context.Context, call fantasy.ToolCall) (fantasy.T
 }

 // ---------------------------------------------------------------------------
-// extensionTool — wraps a ToolDef into a fantasy.AgentTool
+// extensionTool — wraps a ToolDef into an LLM agent tool
 // ---------------------------------------------------------------------------

 type extensionTool struct {
@@ -182,7 +159,7 @@ func (t *extensionTool) Info() fantasy.ToolInfo {
 				info.Parameters = props
 			} else {
 				// Schema doesn't have "properties" — use as-is (may be
-				// a flat property map already matching fantasy's format).
+				// a flat property map already matching the expected format).
 				info.Parameters = schema
 			}
 			// Extract required fields if present.
@@ -238,7 +215,7 @@ func (t *extensionTool) Run(ctx context.Context, call fantasy.ToolCall) (fantasy
 	}

 	if err != nil {
-		return fantasy.NewTextErrorResponse(err.Error()), err
+		return fantasy.NewTextErrorResponse(err.Error()), nil
 	}
 	return fantasy.NewTextResponse(result), nil
 }
@@ -142,8 +142,8 @@ func TestWrappedTool_BlockExecution(t *testing.T) {
 	if toolRan {
 		t.Error("tool should not have run after block")
 	}
-	if err == nil {
-		t.Error("expected error from blocked tool")
+	if err != nil {
+		t.Error("expected nil error for blocked tool (error is conveyed via IsError response)")
 	}
 	if resp.IsError != true {
 		t.Error("expected IsError=true from blocked response")
@@ -192,7 +192,7 @@ func TestWrappedTool_ExecutionStartEnd(t *testing.T) {
 	}
 }

-func TestExtensionToolsAsFantasy(t *testing.T) {
+func TestExtensionToolsAsLLMTools(t *testing.T) {
 	defs := []ToolDef{
 		{
 			Name:        "greet",
@@ -202,7 +202,7 @@ func TestExtensionToolsAsFantasy(t *testing.T) {
 		},
 	}

-	tools := ExtensionToolsAsFantasy(defs, nil)
+	tools := ExtensionToolsAsLLMTools(defs, nil)
 	if len(tools) != 1 {
 		t.Fatalf("expected 1 tool, got %d", len(tools))
 	}
@@ -232,10 +232,10 @@ func TestExtensionTool_Error(t *testing.T) {
 		},
 	}

-	tools := ExtensionToolsAsFantasy(defs, nil)
+	tools := ExtensionToolsAsLLMTools(defs, nil)
 	resp, err := tools[0].Run(context.Background(), fantasy.ToolCall{Input: "x"})
-	if err == nil {
-		t.Error("expected error")
+	if err != nil {
+		t.Error("expected nil error (error is conveyed via IsError response)")
 	}
 	if !resp.IsError {
 		t.Error("expected IsError=true")
@@ -259,7 +259,7 @@ func TestExtensionTool_ExecuteWithContext(t *testing.T) {
 	}

 	// Without runner, OnProgress is a no-op.
-	tools := ExtensionToolsAsFantasy(defs, nil)
+	tools := ExtensionToolsAsLLMTools(defs, nil)
 	resp, err := tools[0].Run(context.Background(), fantasy.ToolCall{Input: "test"})
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
@@ -285,7 +285,7 @@ func TestExtensionTool_ExecuteWithContext(t *testing.T) {
 			},
 		},
 	}
-	tools2 := ExtensionToolsAsFantasy(defs2, runner)
+	tools2 := ExtensionToolsAsLLMTools(defs2, runner)
 	_, err = tools2[0].Run(context.Background(), fantasy.ToolCall{Input: ""})
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
@@ -306,7 +306,7 @@ func TestExtensionTool_ExecuteWithContextPriority(t *testing.T) {
 			},
 		},
 	}
-	tools := ExtensionToolsAsFantasy(defs, nil)
+	tools := ExtensionToolsAsLLMTools(defs, nil)
 	resp, err := tools[0].Run(context.Background(), fantasy.ToolCall{Input: ""})
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
@@ -330,7 +330,7 @@ func TestExtensionTool_CancelledContext(t *testing.T) {
 			},
 		},
 	}
-	tools := ExtensionToolsAsFantasy(defs, nil)
+	tools := ExtensionToolsAsLLMTools(defs, nil)
 	_, _ = tools[0].Run(ctx, fantasy.ToolCall{Input: ""})
 	if !sawCancelled {
 		t.Error("expected IsCancelled=true for cancelled context")
@@ -339,7 +339,7 @@ func TestExtensionTool_CancelledContext(t *testing.T) {

 func TestExtensionTool_ProviderOptions(t *testing.T) {
 	defs := []ToolDef{{Name: "test", Execute: func(string) (string, error) { return "", nil }}}
-	tools := ExtensionToolsAsFantasy(defs, nil)
+	tools := ExtensionToolsAsLLMTools(defs, nil)

 	// Initially nil.
 	opts := tools[0].ProviderOptions()
@@ -0,0 +1,248 @@
+// Package fences provides utilities for detecting markdown code regions
+// (fenced code blocks and inline code spans) and applying transformations
+// only to text outside those regions.
+//
+// This prevents special tokens like $1, $@, or @file from being interpreted
+// when they appear inside ``` fences, ~~~ fences, or `inline` code spans.
+package fences
+
+import "strings"
+
+// Ranges returns byte ranges [start, end) of fenced code blocks in content.
+// Recognises both backtick (```) and tilde (~~~) fences, with optional
+// leading indentation (up to 3 spaces) and optional info strings.
+// An unclosed fence extends to the end of content.
+func Ranges(content string) [][2]int {
+	var result [][2]int
+	var inFence bool
+	var fenceChar byte
+	var fenceCount int
+	var fenceStart int
+
+	pos := 0
+	for pos < len(content) {
+		// Find the end of the current line.
+		lineEnd := strings.IndexByte(content[pos:], '\n')
+		var line string
+		var nextPos int
+		if lineEnd < 0 {
+			line = content[pos:]
+			nextPos = len(content)
+		} else {
+			line = content[pos : pos+lineEnd]
+			nextPos = pos + lineEnd + 1
+		}
+
+		trimmed := strings.TrimLeft(line, " ")
+		indent := len(line) - len(trimmed)
+
+		if !inFence {
+			if indent <= 3 {
+				if ch, n := parseFenceOpen(trimmed); n > 0 {
+					inFence = true
+					fenceChar = ch
+					fenceCount = n
+					fenceStart = pos
+				}
+			}
+		} else {
+			if indent <= 3 && isFenceClose(trimmed, fenceChar, fenceCount) {
+				result = append(result, [2]int{fenceStart, nextPos})
+				inFence = false
+			}
+		}
+
+		pos = nextPos
+	}
+
+	// Unclosed fence extends to end of content.
+	if inFence {
+		result = append(result, [2]int{fenceStart, len(content)})
+	}
+
+	return result
+}
+
+// ReplaceOutside applies fn to each text segment that is outside fenced code
+// blocks and inline code spans, leaving code content unchanged. This is the
+// primary entry point for callers that need to do regex replacement only on
+// non-code text.
+func ReplaceOutside(content string, fn func(string) string) string {
+	ranges := Ranges(content)
+	if len(ranges) == 0 {
+		return replaceOutsideInline(content, fn)
+	}
+
+	var b strings.Builder
+	b.Grow(len(content))
+	pos := 0
+	for _, r := range ranges {
+		if pos < r[0] {
+			// Within non-fenced segments, also skip inline code spans.
+			b.WriteString(replaceOutsideInline(content[pos:r[0]], fn))
+		}
+		// Preserve fenced content verbatim.
+		b.WriteString(content[r[0]:r[1]])
+		pos = r[1]
+	}
+	if pos < len(content) {
+		b.WriteString(replaceOutsideInline(content[pos:], fn))
+	}
+	return b.String()
+}
+
+// StripCode returns content with fenced code blocks and inline code spans
+// removed. Useful for detection/matching where only non-code text matters.
+func StripCode(content string) string {
+	// First strip fenced blocks.
+	stripped := StripFenced(content)
+	// Then strip inline code spans from what remains.
+	return stripInlineCode(stripped)
+}
+
+// StripFenced returns content with fenced code block regions removed.
+// Useful for detection/matching where only non-fenced text matters.
+// NOTE: this does NOT strip inline code spans; use StripCode for both.
+func StripFenced(content string) string {
+	ranges := Ranges(content)
+	if len(ranges) == 0 {
+		return content
+	}
+
+	var b strings.Builder
+	b.Grow(len(content))
+	pos := 0
+	for _, r := range ranges {
+		b.WriteString(content[pos:r[0]])
+		pos = r[1]
+	}
+	b.WriteString(content[pos:])
+	return b.String()
+}
+
+// parseFenceOpen checks whether trimmed (leading spaces already removed)
+// starts a fenced code block. Returns the fence character and count, or
+// (0, 0) if it is not a fence opener.
+func parseFenceOpen(trimmed string) (byte, int) {
+	if len(trimmed) == 0 {
+		return 0, 0
+	}
+	ch := trimmed[0]
+	if ch != '`' && ch != '~' {
+		return 0, 0
+	}
+	count := 0
+	for count < len(trimmed) && trimmed[count] == ch {
+		count++
+	}
+	if count < 3 {
+		return 0, 0
+	}
+	// Per CommonMark: backtick fences cannot have backticks in the info string.
+	if ch == '`' && strings.ContainsRune(trimmed[count:], '`') {
+		return 0, 0
+	}
+	return ch, count
+}
+
+// isFenceClose checks whether trimmed is a closing fence matching fenceChar
+// with at least minCount characters. A closing fence line contains only the
+// fence characters and optional trailing spaces.
+func isFenceClose(trimmed string, fenceChar byte, minCount int) bool {
+	if len(trimmed) == 0 || trimmed[0] != fenceChar {
+		return false
+	}
+	count := 0
+	for count < len(trimmed) && trimmed[count] == fenceChar {
+		count++
+	}
+	if count < minCount {
+		return false
+	}
+	// Closing fence must contain only fence chars (and optional trailing spaces).
+	return strings.TrimRight(trimmed[count:], " ") == ""
+}
+
+// --------------------------------------------------------------------------
+// Inline code span handling
+// --------------------------------------------------------------------------
+
+// inlineCodeRanges returns byte ranges [start, end) of inline code spans
+// in segment. Per CommonMark, a code span opens with N backticks and closes
+// with exactly N backticks.
+func inlineCodeRanges(s string) [][2]int {
+	var result [][2]int
+	i := 0
+	for i < len(s) {
+		if s[i] != '`' {
+			i++
+			continue
+		}
+		// Count opening backticks.
+		start := i
+		n := 0
+		for i < len(s) && s[i] == '`' {
+			n++
+			i++
+		}
+		// Scan for a closing run of exactly n backticks.
+		for j := i; j < len(s); {
+			if s[j] != '`' {
+				j++
+				continue
+			}
+			m := 0
+			for j < len(s) && s[j] == '`' {
+				m++
+				j++
+			}
+			if m == n {
+				result = append(result, [2]int{start, j})
+				i = j
+				break
+			}
+		}
+		// If no closing run was found, i is already past the opening
+		// backticks so the outer loop advances naturally.
+	}
+	return result
+}
+
+// replaceOutsideInline applies fn only to text outside inline code spans.
+func replaceOutsideInline(segment string, fn func(string) string) string {
+	ranges := inlineCodeRanges(segment)
+	if len(ranges) == 0 {
+		return fn(segment)
+	}
+	var b strings.Builder
+	b.Grow(len(segment))
+	pos := 0
+	for _, r := range ranges {
+		if pos < r[0] {
+			b.WriteString(fn(segment[pos:r[0]]))
+		}
+		b.WriteString(segment[r[0]:r[1]])
+		pos = r[1]
+	}
+	if pos < len(segment) {
+		b.WriteString(fn(segment[pos:]))
+	}
+	return b.String()
+}
+
+// stripInlineCode removes inline code spans from s.
+func stripInlineCode(s string) string {
+	ranges := inlineCodeRanges(s)
+	if len(ranges) == 0 {
+		return s
+	}
+	var b strings.Builder
+	b.Grow(len(s))
+	pos := 0
+	for _, r := range ranges {
+		b.WriteString(s[pos:r[0]])
+		pos = r[1]
+	}
+	b.WriteString(s[pos:])
+	return b.String()
+}
@@ -0,0 +1,313 @@
+package fences
+
+import (
+	"testing"
+)
+
+func TestRanges(t *testing.T) {
+	tests := []struct {
+		name    string
+		content string
+		want    [][2]int
+	}{
+		{
+			name:    "no fences",
+			content: "hello world\nno code here",
+			want:    nil,
+		},
+		{
+			name:    "single backtick fence",
+			content: "before\n```\ncode\n```\nafter",
+			want:    [][2]int{{7, 20}},
+		},
+		{
+			name:    "single tilde fence",
+			content: "before\n~~~\ncode\n~~~\nafter",
+			want:    [][2]int{{7, 20}},
+		},
+		{
+			name:    "fence with info string",
+			content: "before\n```go\ncode\n```\nafter",
+			want:    [][2]int{{7, 22}},
+		},
+		{
+			name:    "multiple fences",
+			content: "a\n```\nx\n```\nb\n~~~\ny\n~~~\nc",
+			want:    [][2]int{{2, 12}, {14, 24}},
+		},
+		{
+			name:    "unclosed fence",
+			content: "before\n```\ncode\nmore code",
+			want:    [][2]int{{7, 25}},
+		},
+		{
+			name:    "longer closing fence",
+			content: "before\n```\ncode\n`````\nafter",
+			want:    [][2]int{{7, 22}},
+		},
+		{
+			name:    "shorter closing fence ignored",
+			content: "before\n`````\ncode\n```\nmore\n`````\nafter",
+			want:    [][2]int{{7, 33}},
+		},
+		{
+			name:    "indented fence up to 3 spaces",
+			content: "before\n   ```\ncode\n   ```\nafter",
+			want:    [][2]int{{7, 26}},
+		},
+		{
+			name:    "4 space indent is not a fence",
+			content: "before\n    ```\ncode\n    ```\nafter",
+			want:    nil,
+		},
+		{
+			name: "backtick in info string rejects open",
+			// The ```foo`bar line is not a valid opener (backtick in info).
+			// The standalone ``` becomes an opener with no close.
+			content: "before\n```foo`bar\ncode\n```\nafter",
+			want:    [][2]int{{23, 32}},
+		},
+		{
+			name:    "empty content",
+			content: "",
+			want:    nil,
+		},
+		{
+			name:    "fence only",
+			content: "```\ncode\n```",
+			want:    [][2]int{{0, 12}},
+		},
+		{
+			name:    "fence at end without trailing newline",
+			content: "```\ncode\n```",
+			want:    [][2]int{{0, 12}},
+		},
+		{
+			name:    "tilde fence does not close with backticks",
+			content: "~~~\ncode\n```\nmore\n~~~\nafter",
+			want:    [][2]int{{0, 22}},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := Ranges(tt.content)
+			if len(got) != len(tt.want) {
+				t.Fatalf("Ranges() = %v, want %v", got, tt.want)
+			}
+			for i := range got {
+				if got[i] != tt.want[i] {
+					t.Errorf("Ranges()[%d] = %v, want %v", i, got[i], tt.want[i])
+				}
+			}
+		})
+	}
+}
+
+func TestReplaceOutside(t *testing.T) {
+	upper := func(s string) string {
+		b := []byte(s)
+		for i, c := range b {
+			if c >= 'a' && c <= 'z' {
+				b[i] = c - 32
+			}
+		}
+		return string(b)
+	}
+
+	tests := []struct {
+		name    string
+		content string
+		want    string
+	}{
+		{
+			name:    "no fences",
+			content: "hello world",
+			want:    "HELLO WORLD",
+		},
+		{
+			name:    "text around fence",
+			content: "before\n```\ncode\n```\nafter",
+			want:    "BEFORE\n```\ncode\n```\nAFTER",
+		},
+		{
+			name:    "multiple fences",
+			content: "aaa\n```\nxxx\n```\nbbb\n~~~\nyyy\n~~~\nccc",
+			want:    "AAA\n```\nxxx\n```\nBBB\n~~~\nyyy\n~~~\nCCC",
+		},
+		{
+			name:    "unclosed fence preserves code",
+			content: "before\n```\ncode",
+			want:    "BEFORE\n```\ncode",
+		},
+		{
+			name:    "only fenced content",
+			content: "```\ncode\n```",
+			want:    "```\ncode\n```",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := ReplaceOutside(tt.content, upper)
+			if got != tt.want {
+				t.Errorf("ReplaceOutside() =\n%s\nwant:\n%s", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestStripFenced(t *testing.T) {
+	tests := []struct {
+		name    string
+		content string
+		want    string
+	}{
+		{
+			name:    "no fences",
+			content: "hello $1 world",
+			want:    "hello $1 world",
+		},
+		{
+			name:    "strips fenced code",
+			content: "before $1\n```\n$2 inside\n```\nafter $3",
+			want:    "before $1\nafter $3",
+		},
+		{
+			name:    "multiple fences",
+			content: "a\n```\nx\n```\nb\n~~~\ny\n~~~\nc",
+			want:    "a\nb\nc",
+		},
+		{
+			name:    "unclosed fence",
+			content: "before\n```\n$1 inside",
+			want:    "before\n",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := StripFenced(tt.content)
+			if got != tt.want {
+				t.Errorf("StripFenced() = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestInlineCodeRanges(t *testing.T) {
+	tests := []struct {
+		name string
+		s    string
+		want [][2]int
+	}{
+		{"no backticks", "hello world", nil},
+		{"single backtick span", "use `$1` here", [][2]int{{4, 8}}},
+		{"double backtick span", "use ``$1`` here", [][2]int{{4, 10}}},
+		{"multiple spans", "`$1` and `$2`", [][2]int{{0, 4}, {9, 13}}},
+		{"unmatched backtick", "use `$1 here", nil},
+		{"mismatched backtick counts", "use ``$1` here", nil},
+		{"empty inline content", "use `` `` here", [][2]int{{4, 9}}},
+		{"backticks inside double", "use ``foo`bar`` here", [][2]int{{4, 15}}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := inlineCodeRanges(tt.s)
+			if len(got) != len(tt.want) {
+				t.Fatalf("inlineCodeRanges() = %v, want %v", got, tt.want)
+			}
+			for i := range got {
+				if got[i] != tt.want[i] {
+					t.Errorf("inlineCodeRanges()[%d] = %v, want %v", i, got[i], tt.want[i])
+				}
+			}
+		})
+	}
+}
+
+func TestReplaceOutside_InlineCode(t *testing.T) {
+	upper := func(s string) string {
+		b := []byte(s)
+		for i, c := range b {
+			if c >= 'a' && c <= 'z' {
+				b[i] = c - 32
+			}
+		}
+		return string(b)
+	}
+
+	tests := []struct {
+		name    string
+		content string
+		want    string
+	}{
+		{
+			name:    "inline code preserved",
+			content: "use `code` here",
+			want:    "USE `code` HERE",
+		},
+		{
+			name:    "double backtick inline code",
+			content: "use ``co`de`` here",
+			want:    "USE ``co`de`` HERE",
+		},
+		{
+			name:    "mixed fenced and inline",
+			content: "before `x` mid\n```\nfenced\n```\nafter `y` end",
+			want:    "BEFORE `x` MID\n```\nfenced\n```\nAFTER `y` END",
+		},
+		{
+			name:    "only inline code",
+			content: "`code`",
+			want:    "`code`",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := ReplaceOutside(tt.content, upper)
+			if got != tt.want {
+				t.Errorf("ReplaceOutside() =\n%s\nwant:\n%s", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestStripCode(t *testing.T) {
+	tests := []struct {
+		name    string
+		content string
+		want    string
+	}{
+		{
+			name:    "no code",
+			content: "hello $1 world",
+			want:    "hello $1 world",
+		},
+		{
+			name:    "strips inline code",
+			content: "use `$1` and `$2` for positional args",
+			want:    "use  and  for positional args",
+		},
+		{
+			name:    "strips fenced and inline",
+			content: "before `$1`\n```\n$2 inside\n```\nafter",
+			want:    "before \nafter",
+		},
+		{
+			name:    "real world prompt template",
+			content: "Use $@ for all args.\n`$1`, `$2` for positional.\n```bash\necho $1\n```\n",
+			want:    "Use $@ for all args.\n,  for positional.\n",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := StripCode(tt.content)
+			if got != tt.want {
+				t.Errorf("StripCode() = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
@@ -33,6 +33,10 @@ type AgentSetupOptions struct {
 	// CoreTools overrides the default core tool set. If empty, core.AllTools()
 	// is used. Allows SDK users to pass custom tools (e.g. with WithWorkDir).
 	CoreTools []fantasy.AgentTool
+	// DisableCoreTools, when true, prevents loading any core tools.
+	// If both DisableCoreTools is true and CoreTools is empty, the agent
+	// will have no tools (useful for simple chat completions).
+	DisableCoreTools bool
 	// ExtraTools are additional tools added alongside core, MCP, and extension
 	// tools. They do not replace the defaults — they extend them.
 	ExtraTools []fantasy.AgentTool
@@ -42,13 +46,18 @@ type AgentSetupOptions struct {
 	ToolWrapper func([]fantasy.AgentTool) []fantasy.AgentTool

 	// ProviderConfig, when non-nil, is used directly instead of calling
-	// BuildProviderConfig(). Callers that already hold viperInitMu can
-	// pre-build this and release the lock before calling SetupAgent, so the
-	// slow agent/MCP initialisation runs concurrently with other New() calls.
+	// BuildProviderConfig(). Callers (e.g. Kit.New) pre-build this from their
+	// per-instance config store and pass it here, so the slow agent/MCP
+	// initialisation can run without further config reads.
 	ProviderConfig *models.ProviderConfig
 	// Debug enables debug logging. When zero-value, viper is consulted.
 	// Only meaningful when ProviderConfig is also set.
 	Debug bool
+	// DebugLogger, if non-nil, is used directly as the engine/MCP debug
+	// logger — overriding the built-in SimpleDebugLogger / BufferedDebugLogger
+	// selected by Debug + UseBufferedLogger. Callers supply this when they
+	// want to route debug output into their own logging system.
+	DebugLogger tools.DebugLogger
 	// NoExtensions skips extension loading. When false, viper is consulted.
 	// Only meaningful when ProviderConfig is also set.
 	NoExtensions bool
@@ -61,6 +70,21 @@ type AgentSetupOptions struct {
 	// AuthHandler handles OAuth authorization for remote MCP servers.
 	// When set, remote transports are configured with OAuth support.
 	AuthHandler tools.MCPAuthHandler
+	// TokenStoreFactory, if non-nil, creates a custom token store for each
+	// remote MCP server's OAuth tokens. When nil, the default file-based
+	// token store is used.
+	TokenStoreFactory tools.TokenStoreFactory
+	// OnMCPServerLoaded, if non-nil, is called when each MCP server finishes
+	// loading (successfully or with error). Called from the background goroutine.
+	OnMCPServerLoaded func(serverName string, toolCount int, err error)
+	// MCPTaskConfig configures task-augmented tools/call execution. The
+	// zero value preserves historical synchronous-only behaviour.
+	MCPTaskConfig tools.MCPTaskConfig
+	// Viper is the per-instance configuration store. When set, it is used for
+	// any fallback config reads (debug, no-extensions, max-steps, stream,
+	// extension paths) and is attached to the extension runner. When nil, the
+	// process-global viper store is used.
+	Viper *viper.Viper
 }

 // AgentSetupResult bundles the created agent and any debug logger so the caller
@@ -73,38 +97,62 @@ type AgentSetupResult struct {
 	ExtRunner *extensions.Runner
 }

-// BuildProviderConfig creates a *models.ProviderConfig from the current viper
-// state. All entry points (root, script, SDK) converge through this function.
-func BuildProviderConfig() (*models.ProviderConfig, string, error) {
-	systemPrompt, err := config.LoadSystemPrompt(viper.GetString("system-prompt"))
+// BuildProviderConfig creates a *models.ProviderConfig from the supplied viper
+// store (or the process-global store when v is nil). All entry points (root,
+// script, SDK) converge through this function.
+//
+// Generation parameter pointers (Temperature, TopP, etc.) are only set when
+// the user has explicitly configured them via CLI flag, environment variable,
+// or global config file. This allows per-model defaults from modelSettings
+// and customModels to fill in unset parameters downstream.
+func BuildProviderConfig(v *viper.Viper) (*models.ProviderConfig, string, error) {
+	if v == nil {
+		v = viper.GetViper()
+	}
+	systemPrompt, err := config.LoadSystemPrompt(v.GetString("system-prompt"))
 	if err != nil {
 		return nil, "", fmt.Errorf("failed to load system prompt: %w", err)
 	}

-	temperature := float32(viper.GetFloat64("temperature"))
-	topP := float32(viper.GetFloat64("top-p"))
-	topK := int32(viper.GetInt("top-k"))
-	frequencyPenalty := float32(viper.GetFloat64("frequency-penalty"))
-	presencePenalty := float32(viper.GetFloat64("presence-penalty"))
-	numGPU := int32(viper.GetInt("num-gpu-layers"))
-	mainGPU := int32(viper.GetInt("main-gpu"))
+	numGPU := int32(v.GetInt("num-gpu-layers"))
+	mainGPU := int32(v.GetInt("main-gpu"))

 	cfg := &models.ProviderConfig{
-		ModelString:      viper.GetString("model"),
-		SystemPrompt:     systemPrompt,
-		ProviderAPIKey:   viper.GetString("provider-api-key"),
-		ProviderURL:      viper.GetString("provider-url"),
-		MaxTokens:        viper.GetInt("max-tokens"),
-		Temperature:      &temperature,
-		TopP:             &topP,
-		TopK:             &topK,
-		FrequencyPenalty: &frequencyPenalty,
-		PresencePenalty:  &presencePenalty,
-		StopSequences:    viper.GetStringSlice("stop-sequences"),
-		NumGPU:           &numGPU,
-		MainGPU:          &mainGPU,
-		TLSSkipVerify:    viper.GetBool("tls-skip-verify"),
-		ThinkingLevel:    models.ParseThinkingLevel(viper.GetString("thinking-level")),
+		ModelString:    v.GetString("model"),
+		SystemPrompt:   systemPrompt,
+		ProviderAPIKey: v.GetString("provider-api-key"),
+		ProviderURL:    v.GetString("provider-url"),
+		MaxTokens:      v.GetInt("max-tokens"),
+		StopSequences:  v.GetStringSlice("stop-sequences"),
+		NumGPU:         &numGPU,
+		MainGPU:        &mainGPU,
+		TLSSkipVerify:  v.GetBool("tls-skip-verify"),
+		ThinkingLevel:  models.ParseThinkingLevel(v.GetString("thinking-level")),
+		ConfigStore:    v,
+	}
+
+	// Only set generation parameter pointers when the user has explicitly
+	// provided a value. This leaves nil pointers for unset params, allowing
+	// per-model defaults (modelSettings / customModels params) to apply.
+	if v.IsSet("temperature") {
+		val := float32(v.GetFloat64("temperature"))
+		cfg.Temperature = &val
+	}
+	if v.IsSet("top-p") {
+		val := float32(v.GetFloat64("top-p"))
+		cfg.TopP = &val
+	}
+	if v.IsSet("top-k") {
+		val := int32(v.GetInt("top-k"))
+		cfg.TopK = &val
+	}
+	if v.IsSet("frequency-penalty") {
+		val := float32(v.GetFloat64("frequency-penalty"))
+		cfg.FrequencyPenalty = &val
+	}
+	if v.IsSet("presence-penalty") {
+		val := float32(v.GetFloat64("presence-penalty"))
+		cfg.PresencePenalty = &val
 	}

 	return cfg, systemPrompt, nil
@@ -116,14 +164,21 @@ func SetupAgent(ctx context.Context, opts AgentSetupOptions) (*AgentSetupResult,
 	var modelConfig *models.ProviderConfig
 	var systemPrompt string

+	// Resolve the config store: prefer the per-instance store, falling back to
+	// the process-global store.
+	v := opts.Viper
+	if v == nil {
+		v = viper.GetViper()
+	}
+
 	if opts.ProviderConfig != nil {
-		// Pre-built config supplied by caller (e.g. Kit.New after releasing
-		// viperInitMu). Use it directly — no viper reads needed here.
+		// Pre-built config supplied by caller (e.g. Kit.New after building the
+		// per-instance store). Use it directly — no viper reads needed here.
 		modelConfig = opts.ProviderConfig
 		systemPrompt = modelConfig.SystemPrompt
 	} else {
 		var err error
-		modelConfig, systemPrompt, err = BuildProviderConfig()
+		modelConfig, systemPrompt, err = BuildProviderConfig(v)
 		if err != nil {
 			return nil, err
 		}
@@ -131,18 +186,23 @@ func SetupAgent(ctx context.Context, opts AgentSetupOptions) (*AgentSetupResult,

 	// Resolve debug / no-extensions / max-steps / streaming: prefer explicit
 	// fields (set when ProviderConfig was pre-built) over viper fallback.
-	debugEnabled := opts.Debug || viper.GetBool("debug")
-	noExtensions := opts.NoExtensions || viper.GetBool("no-extensions")
+	debugEnabled := opts.Debug || v.GetBool("debug")
+	noExtensions := opts.NoExtensions || v.GetBool("no-extensions")
 	maxSteps := opts.MaxSteps
 	if maxSteps == 0 {
-		maxSteps = viper.GetInt("max-steps")
+		maxSteps = v.GetInt("max-steps")
 	}
-	streamingEnabled := opts.StreamingEnabled || viper.GetBool("stream")
+	streamingEnabled := opts.StreamingEnabled || v.GetBool("stream")

 	// Create the appropriate debug logger.
 	var debugLogger tools.DebugLogger
 	var bufferedLogger *tools.BufferedDebugLogger
-	if debugEnabled {
+	switch {
+	case opts.DebugLogger != nil:
+		// Caller-supplied logger wins unconditionally. Its IsDebugEnabled()
+		// is the source of truth for whether downstream code emits messages.
+		debugLogger = opts.DebugLogger
+	case debugEnabled:
 		if opts.UseBufferedLogger {
 			bufferedLogger = tools.NewBufferedDebugLogger(true)
 			debugLogger = bufferedLogger
@@ -156,7 +216,7 @@ func SetupAgent(ctx context.Context, opts AgentSetupOptions) (*AgentSetupResult,
 	var extCreationOpts extensionCreationOpts
 	if !noExtensions {
 		var extErr error
-		extRunner, extCreationOpts, extErr = loadExtensions()
+		extRunner, extCreationOpts, extErr = loadExtensions(v)
 		if extErr != nil {
 			fmt.Printf("Warning: Failed to load extensions: %v\n", extErr)
 		}
@@ -183,19 +243,23 @@ func SetupAgent(ctx context.Context, opts AgentSetupOptions) (*AgentSetupResult,
 	}

 	a, err := agent.CreateAgent(ctx, &agent.AgentCreationOptions{
-		ModelConfig:      modelConfig,
-		MCPConfig:        opts.MCPConfig,
-		SystemPrompt:     systemPrompt,
-		MaxSteps:         maxSteps,
-		StreamingEnabled: streamingEnabled,
-		ShowSpinner:      opts.ShowSpinner,
-		Quiet:            opts.Quiet,
-		SpinnerFunc:      opts.SpinnerFunc,
-		DebugLogger:      debugLogger,
-		AuthHandler:      opts.AuthHandler,
-		CoreTools:        opts.CoreTools,
-		ToolWrapper:      toolWrapper,
-		ExtraTools:       extraTools,
+		ModelConfig:       modelConfig,
+		MCPConfig:         opts.MCPConfig,
+		SystemPrompt:      systemPrompt,
+		MaxSteps:          maxSteps,
+		StreamingEnabled:  streamingEnabled,
+		ShowSpinner:       opts.ShowSpinner,
+		Quiet:             opts.Quiet,
+		SpinnerFunc:       opts.SpinnerFunc,
+		DebugLogger:       debugLogger,
+		AuthHandler:       opts.AuthHandler,
+		TokenStoreFactory: opts.TokenStoreFactory,
+		CoreTools:         opts.CoreTools,
+		DisableCoreTools:  opts.DisableCoreTools,
+		ToolWrapper:       toolWrapper,
+		ExtraTools:        extraTools,
+		OnMCPServerLoaded: opts.OnMCPServerLoaded,
+		MCPTaskConfig:     opts.MCPTaskConfig,
 	})
 	if err != nil {
 		return nil, fmt.Errorf("failed to create agent: %w", err)
@@ -216,9 +280,14 @@ type extensionCreationOpts struct {
 }

 // loadExtensions discovers and loads Yaegi extensions, builds the runner,
-// and returns the tool wrapper/extra tools.
-func loadExtensions() (*extensions.Runner, extensionCreationOpts, error) {
-	extraPaths := viper.GetStringSlice("extension")
+// and returns the tool wrapper/extra tools. The supplied store is used to
+// resolve the "extension" config key and is attached to the runner so
+// extension option lookups stay isolated to this Kit instance.
+func loadExtensions(v *viper.Viper) (*extensions.Runner, extensionCreationOpts, error) {
+	if v == nil {
+		v = viper.GetViper()
+	}
+	extraPaths := v.GetStringSlice("extension")
 	loaded, err := extensions.LoadExtensions(extraPaths)
 	if err != nil {
 		return nil, extensionCreationOpts{}, err
@@ -229,12 +298,13 @@ func loadExtensions() (*extensions.Runner, extensionCreationOpts, error) {
 	}

 	runner := extensions.NewRunner(loaded)
+	runner.SetConfigStore(v)

 	wrapper := func(tools []fantasy.AgentTool) []fantasy.AgentTool {
 		return extensions.WrapToolsWithExtensions(tools, runner)
 	}

-	extTools := extensions.ExtensionToolsAsFantasy(runner.RegisteredTools(), runner)
+	extTools := extensions.ExtensionToolsAsLLMTools(runner.RegisteredTools(), runner)

 	return runner, extensionCreationOpts{
 		toolWrapper: wrapper,
@@ -325,12 +325,6 @@ func UnmarshalParts(data []byte) ([]ContentPart, error) {
 // mixed TextPart and ToolCallPart content. Tool-role messages produce
 // ToolResultPart entries.
 func (m *Message) ToLLMMessages() []fantasy.Message {
-	return m.ToFantasyMessages()
-}
-
-// Deprecated: Use ToLLMMessages instead.
-// ToFantasyMessages converts a Message to one or more LLM message values.
-func (m *Message) ToFantasyMessages() []fantasy.Message {
 	switch m.Role {
 	case RoleAssistant:
 		var parts []fantasy.MessagePart
@@ -431,13 +425,6 @@ func (m *Message) ToFantasyMessages() []fantasy.Message {
 // FromLLMMessage converts an LLM message into our Message type,
 // extracting all content parts into the appropriate block types.
 func FromLLMMessage(msg fantasy.Message) Message {
-	return FromFantasyMessage(msg)
-}
-
-// Deprecated: Use FromLLMMessage instead.
-// FromFantasyMessage converts an LLM message into our Message type,
-// extracting all content parts into the appropriate block types.
-func FromFantasyMessage(msg fantasy.Message) Message {
 	m := Message{
 		Role:      MessageRole(msg.Role),
 		Parts:     make([]ContentPart, 0),
@@ -0,0 +1,282 @@
+package models
+
+import (
+	"context"
+	"io"
+	"net/http"
+	"reflect"
+	"strings"
+	"testing"
+)
+
+// TestNpmToWireProtocol documents the wire protocols that the auto-router
+// understands. Provider-specific bundles that need bespoke auth or URL
+// templating (azure, bedrock, openrouter, google-vertex*, @ai-sdk/gateway)
+// are intentionally absent — they have native top-level cases in
+// CreateProvider and never reach the auto-router.
+func TestNpmToWireProtocol(t *testing.T) {
+	want := map[string]wireProtocol{
+		"@ai-sdk/openai":            wireOpenAI,
+		"@ai-sdk/openai-compatible": wireOpenAI,
+		"@ai-sdk/anthropic":         wireAnthropic,
+		"@ai-sdk/google":            wireGoogle,
+
+		// Thin OpenAI-compatible wrappers — routed via openaicompat using
+		// the SDK's hard-coded default base URL (sdkDefaultBaseURL).
+		"@ai-sdk/groq":                  wireOpenAI,
+		"@ai-sdk/cerebras":              wireOpenAI,
+		"@ai-sdk/perplexity":            wireOpenAI,
+		"@ai-sdk/togetherai":            wireOpenAI,
+		"@ai-sdk/xai":                   wireOpenAI,
+		"@ai-sdk/deepinfra":             wireOpenAI,
+		"@ai-sdk/mistral":               wireOpenAI,
+		"@ai-sdk/cohere":                wireOpenAI,
+		"@ai-sdk/vercel":                wireOpenAI,
+		"@aihubmix/ai-sdk-provider":     wireOpenAI,
+		"venice-ai-sdk-provider":        wireOpenAI,
+		"merge-gateway-ai-sdk-provider": wireOpenAI,
+	}
+	for npm, wire := range want {
+		if got := npmToWireProtocol[npm]; got != wire {
+			t.Errorf("npmToWireProtocol[%q] = %d, want %d", npm, got, wire)
+		}
+	}
+
+	// Bundle packages must NOT be in the table — they need bespoke auth or
+	// URL templating that the auto-router cannot satisfy.
+	for _, npm := range []string{
+		"@ai-sdk/google-vertex",
+		"@ai-sdk/google-vertex/anthropic",
+		"@ai-sdk/amazon-bedrock",
+		"@ai-sdk/azure",
+		"@openrouter/ai-sdk-provider",
+		"@ai-sdk/gateway",
+	} {
+		if _, ok := npmToWireProtocol[npm]; ok {
+			t.Errorf("npmToWireProtocol unexpectedly contains bundle package %q", npm)
+		}
+	}
+}
+
+// newTestRegistry builds a registry containing a single proxy-style provider
+// ("testproxy") with the given default npm, plus one model that carries the
+// given per-model npm override.
+func newTestRegistry(api, defaultNPM, modelID, modelNPMOverride string) *ModelsRegistry {
+	return &ModelsRegistry{
+		providers: map[string]ProviderInfo{
+			"testproxy": {
+				ID:   "testproxy",
+				Name: "Test Proxy",
+				Env:  []string{"TESTPROXY_API_KEY"},
+				NPM:  defaultNPM,
+				API:  api,
+				Models: map[string]ModelInfo{
+					modelID: {
+						ID:          modelID,
+						Name:        modelID,
+						ProviderNPM: modelNPMOverride,
+					},
+				},
+			},
+		},
+	}
+}
+
+// TestAutoRouteProvider_WireRouting verifies that autoRouteProvider routes each
+// npm package to the correct fantasy provider implementation. This is the core
+// regression test for issue #41: previously any npm that resolved to a
+// non-openai/anthropic/openaicompat LLM provider (notably @ai-sdk/google) hit a
+// dead `default` branch and failed with "has no LLM provider mapping".
+func TestAutoRouteProvider_WireRouting(t *testing.T) {
+	tests := []struct {
+		name        string
+		modelID     string
+		defaultNPM  string
+		overrideNPM string
+		// wantType is the concrete fantasy LanguageModel type the model should
+		// be routed to, identified by reflect type string.
+		wantType string
+	}{
+		{
+			name:       "openai-compatible default",
+			modelID:    "test-model",
+			defaultNPM: "@ai-sdk/openai-compatible",
+			wantType:   "openai.languageModel",
+		},
+		{
+			name:        "anthropic override",
+			modelID:     "test-model",
+			defaultNPM:  "@ai-sdk/openai-compatible",
+			overrideNPM: "@ai-sdk/anthropic",
+			wantType:    "anthropic.languageModel",
+		},
+		{
+			name:        "openai (responses) override",
+			modelID:     "gpt-4o",
+			defaultNPM:  "@ai-sdk/openai-compatible",
+			overrideNPM: "@ai-sdk/openai",
+			wantType:    "openai.responsesLanguageModel",
+		},
+		{
+			// The bug: opencode's gemini-* models override the default
+			// openai-compatible npm with @ai-sdk/google.
+			name:        "google override (issue #41)",
+			modelID:     "gemini-3.5-flash",
+			defaultNPM:  "@ai-sdk/openai-compatible",
+			overrideNPM: "@ai-sdk/google",
+			wantType:    "*google.languageModel",
+		},
+		{
+			// Unknown npm but provider has an API URL → openai-compatible fallback.
+			name:       "unknown npm with API URL falls back to openai-compat",
+			modelID:    "test-model",
+			defaultNPM: "@ai-sdk/some-future-thing",
+			wantType:   "openai.languageModel",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			reg := newTestRegistry("https://proxy.example/v1", tt.defaultNPM, tt.modelID, tt.overrideNPM)
+			config := &ProviderConfig{ProviderAPIKey: "test-key"}
+
+			result, err := autoRouteProvider(context.Background(), config, "testproxy", tt.modelID, reg)
+			if err != nil {
+				t.Fatalf("autoRouteProvider returned error: %v", err)
+			}
+			if result == nil || result.Model == nil {
+				t.Fatalf("autoRouteProvider returned nil model")
+			}
+
+			gotType := reflect.TypeOf(result.Model).String()
+			if gotType != tt.wantType {
+				t.Errorf("routed to %s, want %s", gotType, tt.wantType)
+			}
+		})
+	}
+}
+
+// TestAutoRouteProvider_UnknownNpmNoAPI verifies the improved error message for
+// a provider whose npm has no known wire protocol and that has no API URL to
+// fall back on.
+func TestAutoRouteProvider_UnknownNpmNoAPI(t *testing.T) {
+	reg := newTestRegistry("", "@ai-sdk/unmapped", "test-model", "")
+	config := &ProviderConfig{ProviderAPIKey: "test-key"}
+
+	_, err := autoRouteProvider(context.Background(), config, "testproxy", "test-model", reg)
+	if err == nil {
+		t.Fatal("expected error for unknown npm with no API URL, got nil")
+	}
+	if !strings.Contains(err.Error(), "cannot auto-route provider testproxy") {
+		t.Errorf("unexpected error message: %v", err)
+	}
+	if !strings.Contains(err.Error(), "--provider-url") {
+		t.Errorf("error should suggest --provider-url, got: %v", err)
+	}
+}
+
+// TestAutoRouteProvider_UnknownProvider verifies the not-in-database error.
+func TestAutoRouteProvider_UnknownProvider(t *testing.T) {
+	reg := newTestRegistry("https://proxy.example/v1", "@ai-sdk/openai-compatible", "test-model", "")
+	config := &ProviderConfig{ProviderAPIKey: "test-key"}
+
+	_, err := autoRouteProvider(context.Background(), config, "does-not-exist", "test-model", reg)
+	if err == nil {
+		t.Fatal("expected error for unknown provider, got nil")
+	}
+	if !strings.Contains(err.Error(), "not found in model database") {
+		t.Errorf("unexpected error message: %v", err)
+	}
+}
+
+// TestIsProviderLLMSupported_Google verifies that a provider whose npm is
+// @ai-sdk/google is reported as supported (it now maps to a wire protocol).
+func TestIsProviderLLMSupported_Google(t *testing.T) {
+	info := &ProviderInfo{ID: "testproxy", NPM: "@ai-sdk/google"}
+	if !isProviderLLMSupported("testproxy", info) {
+		t.Error("expected @ai-sdk/google provider to be LLM-supported")
+	}
+}
+
+// TestVersionedBasePath verifies detection of proxy base URLs that already
+// carry an API version segment (which collides with the genai SDK's injected
+// version).
+func TestVersionedBasePath(t *testing.T) {
+	tests := []struct {
+		rawURL string
+		want   string
+	}{
+		{"https://opencode.ai/zen/v1", "/zen/v1"},
+		{"https://opencode.ai/zen/v1/", "/zen/v1"},
+		{"https://example.com/api/v1beta", "/api/v1beta"},
+		{"https://example.com/api/v2alpha", "/api/v2alpha"},
+		{"https://generativelanguage.googleapis.com", ""},
+		{"https://proxy.example/openai", ""},
+		{"", ""},
+	}
+	for _, tt := range tests {
+		if got := versionedBasePath(tt.rawURL); got != tt.want {
+			t.Errorf("versionedBasePath(%q) = %q, want %q", tt.rawURL, got, tt.want)
+		}
+	}
+}
+
+// recordingRoundTripper captures the path of the request it receives.
+type recordingRoundTripper struct{ gotPath string }
+
+func (r *recordingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
+	r.gotPath = req.URL.Path
+	return &http.Response{
+		StatusCode: 200,
+		Body:       io.NopCloser(strings.NewReader("{}")),
+		Header:     make(http.Header),
+	}, nil
+}
+
+// TestGeminiProxyTransport_StripsInjectedVersion verifies that the transport
+// collapses the genai-injected "/v1beta" segment that follows a proxy base
+// URL which already carries its own version segment. This is the second-order
+// fix that makes opencode/gemini-* actually reach the proxy (issue #41).
+func TestGeminiProxyTransport_StripsInjectedVersion(t *testing.T) {
+	tests := []struct {
+		name     string
+		basePath string
+		reqPath  string
+		wantPath string
+	}{
+		{
+			name:     "strips doubled v1beta after /zen/v1",
+			basePath: "/zen/v1",
+			reqPath:  "/zen/v1/v1beta/models/gemini-3.5-flash:generateContent",
+			wantPath: "/zen/v1/models/gemini-3.5-flash:generateContent",
+		},
+		{
+			name:     "strips doubled v1beta1 after /zen/v1",
+			basePath: "/zen/v1",
+			reqPath:  "/zen/v1/v1beta1/models/gemini-3.5-flash:generateContent",
+			wantPath: "/zen/v1/models/gemini-3.5-flash:generateContent",
+		},
+		{
+			name:     "leaves non-matching path untouched",
+			basePath: "/zen/v1",
+			reqPath:  "/other/v1beta/models/x:generateContent",
+			wantPath: "/other/v1beta/models/x:generateContent",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			rec := &recordingRoundTripper{}
+			tr := &geminiProxyTransport{base: rec, basePath: tt.basePath}
+			req, err := http.NewRequest(http.MethodPost, "https://host"+tt.reqPath, nil)
+			if err != nil {
+				t.Fatalf("NewRequest: %v", err)
+			}
+			if _, err := tr.RoundTrip(req); err != nil {
+				t.Fatalf("RoundTrip: %v", err)
+			}
+			if rec.gotPath != tt.wantPath {
+				t.Errorf("forwarded path = %q, want %q", rec.gotPath, tt.wantPath)
+			}
+		})
+	}
+}
@@ -3,7 +3,6 @@ package models
 import (
 	"crypto/sha256"
 	"encoding/hex"
-	"maps"
 	"os"

 	"charm.land/fantasy"
@@ -69,19 +68,3 @@ func generateCacheKey(systemPrompt, modelID string) string {
 	// Prefix with "kit-" to identify KIT-generated cache keys
 	return "kit-" + hex.EncodeToString(h.Sum(nil))[:24]
 }
-
-// mergeProviderOptions merges multiple ProviderOptions maps.
-// Later maps take precedence over earlier ones.
-func mergeProviderOptions(opts ...fantasy.ProviderOptions) fantasy.ProviderOptions {
-	result := make(fantasy.ProviderOptions)
-
-	for _, opt := range opts {
-		maps.Copy(result, opt)
-	}
-
-	if len(result) == 0 {
-		return nil
-	}
-
-	return result
-}
@@ -3,8 +3,6 @@ package models
 import (
 	"os"
 	"testing"
-
-	"charm.land/fantasy"
 )

 func TestModelInfo_SupportsCaching(t *testing.T) {
@@ -192,57 +190,3 @@ func TestCachingPriorityOverThinking(t *testing.T) {
 		t.Errorf("OpenAI caching should work when thinking is OFF")
 	}
 }
-
-func TestMergeProviderOptions(t *testing.T) {
-	opts1 := fantasy.ProviderOptions{
-		"provider1": &testProviderData{value: "value1"},
-	}
-	opts2 := fantasy.ProviderOptions{
-		"provider2": &testProviderData{value: "value2"},
-	}
-
-	merged := mergeProviderOptions(opts1, opts2)
-
-	if len(merged) != 2 {
-		t.Errorf("mergeProviderOptions should combine options from multiple maps, got %d items", len(merged))
-	}
-
-	if _, ok := merged["provider1"]; !ok {
-		t.Errorf("merged options should contain 'provider1' key")
-	}
-
-	if _, ok := merged["provider2"]; !ok {
-		t.Errorf("merged options should contain 'provider2' key")
-	}
-
-	// Later options should override earlier ones
-	opts3 := fantasy.ProviderOptions{
-		"provider1": &testProviderData{value: "overridden"},
-	}
-	merged2 := mergeProviderOptions(opts1, opts3)
-
-	if data, ok := merged2["provider1"].(*testProviderData); ok {
-		if data.value != "overridden" {
-			t.Errorf("later options should override earlier ones, got %q", data.value)
-		}
-	}
-
-	if mergeProviderOptions() != nil {
-		t.Errorf("mergeProviderOptions with no args should return nil")
-	}
-}
-
-// testProviderData is a simple implementation of ProviderOptionsData for testing
-type testProviderData struct {
-	value string
-}
-
-func (t *testProviderData) Options() {}
-
-func (t *testProviderData) MarshalJSON() ([]byte, error) {
-	return []byte(`"` + t.value + `"`), nil
-}
-
-func (t *testProviderData) UnmarshalJSON(data []byte) error {
-	return nil
-}
@@ -0,0 +1,84 @@
+package models
+
+import (
+	"net/http"
+	"testing"
+	"time"
+)
+
+func TestCopilotProviderAliasUsesCatalog(t *testing.T) {
+	registry := NewModelsRegistry()
+
+	models, err := registry.GetModelsForProvider("copilot")
+	if err != nil {
+		t.Fatalf("GetModelsForProvider(copilot) failed: %v", err)
+	}
+	if len(models) == 0 {
+		t.Fatal("expected copilot alias to return github-copilot catalog models")
+	}
+	if registry.LookupModel("copilot", "gpt-5.5") == nil {
+		t.Fatal("expected copilot/gpt-5.5 to resolve through github-copilot catalog")
+	}
+	if registry.GetProviderInfo("copilot") == nil {
+		t.Fatal("expected copilot alias to return github-copilot provider info")
+	}
+}
+
+func TestCopilotRejectsNonGPTModels(t *testing.T) {
+	_, err := CreateProvider(t.Context(), &ProviderConfig{ModelString: "copilot/claude-sonnet-4.6"})
+	if err == nil {
+		t.Fatal("expected non-GPT Copilot model to be rejected")
+	}
+}
+
+func TestCopilotHTTPClientCachesToken(t *testing.T) {
+	client := createCopilotHTTPClient("cached-token", time.Now().Add(time.Hour).Unix(), false)
+	transport, ok := client.Transport.(*copilotTransport)
+	if !ok {
+		t.Fatal("expected *copilotTransport")
+	}
+
+	token := transport.cachedToken(t.Context())
+	if token != "cached-token" {
+		t.Fatalf("expected cached token, got %q", token)
+	}
+}
+
+func TestCopilotTransportHeaders(t *testing.T) {
+	req, err := http.NewRequest(http.MethodGet, "https://example.com", nil)
+	if err != nil {
+		t.Fatalf("NewRequest failed: %v", err)
+	}
+
+	transport := &copilotTransport{
+		base: roundTripFunc(func(req *http.Request) (*http.Response, error) {
+			if req.Header.Get("Authorization") != "Bearer cached-token" {
+				t.Fatalf("unexpected Authorization header: %q", req.Header.Get("Authorization"))
+			}
+			if req.Header.Get("Copilot-Integration-Id") != copilotIntegrationID {
+				t.Fatalf("unexpected Copilot-Integration-Id header: %q", req.Header.Get("Copilot-Integration-Id"))
+			}
+			if req.Header.Get("Editor-Version") != copilotEditorVersion {
+				t.Fatalf("unexpected Editor-Version header: %q", req.Header.Get("Editor-Version"))
+			}
+			if req.Header.Get("User-Agent") != copilotUserAgent {
+				t.Fatalf("unexpected User-Agent header: %q", req.Header.Get("User-Agent"))
+			}
+			return &http.Response{StatusCode: http.StatusOK, Body: http.NoBody}, nil
+		}),
+		token:     "cached-token",
+		expiresAt: time.Now().Add(time.Hour).Unix(),
+	}
+
+	resp, err := transport.RoundTrip(req)
+	if err != nil {
+		t.Fatalf("RoundTrip failed: %v", err)
+	}
+	_ = resp.Body.Close()
+}
+
+type roundTripFunc func(*http.Request) (*http.Response, error)
+
+func (f roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
+	return f(req)
+}
@@ -2,20 +2,32 @@ package models

 import (
 	"log"
+	"os"
+	"strings"

 	"github.com/spf13/viper"
 )

 // loadCustomModelsFromConfig loads custom model definitions from the config file
 // and returns them as a map of model ID -> ModelInfo. Returns nil if no custom
-// models are configured.
+// models are configured. Reads from the process-global viper store (the model
+// registry is a process-global singleton).
 func loadCustomModelsFromConfig() map[string]ModelInfo {
-	if !viper.IsSet("customModels") {
+	return loadCustomModelsFrom(viper.GetViper())
+}
+
+// loadCustomModelsFrom loads custom model definitions from the supplied store.
+// When v is nil the process-global store is used.
+func loadCustomModelsFrom(v *viper.Viper) map[string]ModelInfo {
+	if v == nil {
+		v = viper.GetViper()
+	}
+	if !v.IsSet("customModels") {
 		return nil
 	}

 	var customModels map[string]CustomModelConfig
-	if err := viper.UnmarshalKey("customModels", &customModels); err != nil {
+	if err := v.UnmarshalKey("customModels", &customModels); err != nil {
 		log.Printf("Warning: Failed to parse customModels: %v", err)
 		return nil
 	}
@@ -31,14 +43,15 @@ func loadCustomModelsFromConfig() map[string]ModelInfo {

 // modelConfigToModelInfo converts a CustomModelConfig to a ModelInfo.
 func modelConfigToModelInfo(modelID string, cfg CustomModelConfig) ModelInfo {
-	return ModelInfo{
-		ID:          modelID,
-		Name:        cfg.Name,
-		Attachment:  cfg.Attachment,
-		Reasoning:   cfg.Reasoning,
-		Temperature: cfg.Temperature,
-		BaseURL:     cfg.BaseURL,
-		APIKey:      cfg.APIKey,
+	info := ModelInfo{
+		ID:           modelID,
+		Name:         cfg.Name,
+		Attachment:   cfg.Attachment,
+		Reasoning:    cfg.Reasoning,
+		Temperature:  cfg.Temperature,
+		BaseURL:      cfg.BaseURL,
+		APIKey:       cfg.APIKey,
+		APIModelName: cfg.APIModelName,
 		Cost: Cost{
 			Input:  cfg.Cost.Input,
 			Output: cfg.Cost.Output,
@@ -48,21 +61,259 @@ func modelConfigToModelInfo(modelID string, cfg CustomModelConfig) ModelInfo {
 			Output:  cfg.Limit.Output,
 		},
 	}
+
+	// Convert custom model generation params if any are set.
+	if p := convertGenerationParams(cfg.Params); p != nil {
+		info.Params = p
+	}
+
+	return info
+}
+
+// LoadModelSettingsFrom loads per-model generation parameter overrides from the
+// supplied per-instance store. When v is nil the process-global store is used.
+// Keys are "provider/model" strings. Returns nil if no model settings are
+// configured.
+func LoadModelSettingsFrom(v *viper.Viper) map[string]*GenerationParams {
+	if v == nil {
+		v = viper.GetViper()
+	}
+	if !v.IsSet("modelSettings") {
+		return nil
+	}
+
+	var settings map[string]GenerationParamsConfig
+	if err := v.UnmarshalKey("modelSettings", &settings); err != nil {
+		log.Printf("Warning: Failed to parse modelSettings: %v", err)
+		return nil
+	}
+
+	result := make(map[string]*GenerationParams, len(settings))
+	for modelKey, cfg := range settings {
+		if p := convertGenerationParams(cfg); p != nil {
+			result[modelKey] = p
+		}
+	}
+
+	return result
+}
+
+// convertGenerationParams converts a GenerationParamsConfig to a GenerationParams.
+// Returns nil if no parameters are set.
+func convertGenerationParams(cfg GenerationParamsConfig) *GenerationParams {
+	p := &GenerationParams{}
+	any := false
+
+	if cfg.MaxTokens != nil {
+		p.MaxTokens = cfg.MaxTokens
+		any = true
+	}
+	if cfg.Temperature != nil {
+		p.Temperature = cfg.Temperature
+		any = true
+	}
+	if cfg.TopP != nil {
+		p.TopP = cfg.TopP
+		any = true
+	}
+	if cfg.TopK != nil {
+		p.TopK = cfg.TopK
+		any = true
+	}
+	if cfg.FrequencyPenalty != nil {
+		p.FrequencyPenalty = cfg.FrequencyPenalty
+		any = true
+	}
+	if cfg.PresencePenalty != nil {
+		p.PresencePenalty = cfg.PresencePenalty
+		any = true
+	}
+	if len(cfg.StopSequences) > 0 {
+		p.StopSequences = cfg.StopSequences
+		any = true
+	}
+	if cfg.ThinkingLevel != "" {
+		p.ThinkingLevel = ParseThinkingLevel(cfg.ThinkingLevel)
+		any = true
+	}
+	if cfg.SystemPrompt != "" {
+		p.SystemPrompt = cfg.SystemPrompt
+		any = true
+	}
+
+	if !any {
+		return nil
+	}
+	return p
+}
+
+// ApplyModelSettings merges per-model generation parameter defaults from the
+// registry into a ProviderConfig. Model-level params are only applied for
+// fields where the user has not explicitly set a value (i.e., the
+// corresponding viper key is not set via CLI flag or global config).
+//
+// The lookup order is:
+//  1. modelSettings["provider/model"] from config (highest model-level priority)
+//  2. ModelInfo.Params from custom model definitions
+//
+// Both are overridden by explicit CLI flags / global config values.
+func ApplyModelSettings(config *ProviderConfig, modelInfo *ModelInfo) {
+	provider, modelName, err := ParseModelString(config.ModelString)
+	if err != nil {
+		return
+	}
+
+	// Resolve the config store: prefer the per-instance store carried on the
+	// ProviderConfig (set by BuildProviderConfig / Kit.New), falling back to
+	// the process-global store for callers that don't thread one through.
+	store := config.ConfigStore
+
+	// Collect model-level params: modelSettings override > custom model params.
+	// modelSettings takes priority because it's the more specific/intentional config.
+	var params *GenerationParams
+
+	// First check modelSettings from config.
+	if settings := LoadModelSettingsFrom(store); settings != nil {
+		modelKey := provider + "/" + modelName
+		if p, ok := settings[modelKey]; ok {
+			params = p
+		}
+	}
+
+	// Fall back to ModelInfo.Params (from custom model definitions).
+	if params == nil && modelInfo != nil && modelInfo.Params != nil {
+		params = modelInfo.Params
+	}
+
+	if params == nil {
+		return
+	}
+
+	// Apply each parameter only when the user hasn't explicitly set it.
+	// We check viper.IsSet() which returns true only when the key was
+	// set via CLI flag, environment variable, or config file global section.
+
+	if params.MaxTokens != nil && !isExplicitlySet(store, "max-tokens") {
+		config.MaxTokens = *params.MaxTokens
+	}
+	if params.Temperature != nil && !isExplicitlySet(store, "temperature") {
+		config.Temperature = params.Temperature
+	}
+	if params.TopP != nil && !isExplicitlySet(store, "top-p") {
+		config.TopP = params.TopP
+	}
+	if params.TopK != nil && !isExplicitlySet(store, "top-k") {
+		config.TopK = params.TopK
+	}
+	if params.FrequencyPenalty != nil && !isExplicitlySet(store, "frequency-penalty") {
+		config.FrequencyPenalty = params.FrequencyPenalty
+	}
+	if params.PresencePenalty != nil && !isExplicitlySet(store, "presence-penalty") {
+		config.PresencePenalty = params.PresencePenalty
+	}
+	if len(params.StopSequences) > 0 && !isExplicitlySet(store, "stop-sequences") {
+		config.StopSequences = params.StopSequences
+	}
+	if params.ThinkingLevel != "" && !isExplicitlySet(store, "thinking-level") {
+		config.ThinkingLevel = params.ThinkingLevel
+	}
+	if params.SystemPrompt != "" && config.SystemPrompt == "" {
+		// Resolve file paths: if the value points to an existing file, read it.
+		// We check config.SystemPrompt == "" rather than isExplicitlySet because
+		// viper.BindPFlag causes IsSet to return true even for unset flags.
+		config.SystemPrompt = LoadSystemPromptValue(params.SystemPrompt)
+	}
+}
+
+// LoadSystemPromptValue resolves a system prompt value that may be either
+// inline text or a file path. If the value is a path to an existing file,
+// its contents are read and returned. Otherwise the string is returned as-is.
+// This mirrors config.LoadSystemPrompt but lives in the models package to
+// avoid circular dependencies.
+func LoadSystemPromptValue(input string) string {
+	if input == "" {
+		return ""
+	}
+	if info, err := os.Stat(input); err == nil && !info.IsDir() {
+		content, err := os.ReadFile(input)
+		if err != nil {
+			log.Printf("Warning: failed to read system prompt file %q: %v", input, err)
+			return input
+		}
+		return strings.TrimSpace(string(content))
+	}
+	return input
+}
+
+// isExplicitlySet returns true when the user has explicitly set a config key
+// via CLI flag, environment variable, or the global section of the config file.
+// Model-level defaults should not override explicitly set values.
+//
+// The check runs against the supplied per-instance store when non-nil,
+// otherwise the process-global store. This keeps the "explicit vs unset"
+// precedence contract per-Kit-instance once a store is threaded through.
+func isExplicitlySet(v *viper.Viper, key string) bool {
+	if v == nil {
+		v = viper.GetViper()
+	}
+	// viper.IsSet returns true if the key has been set in any of the
+	// data stores (flag, env, config file, default). We need to check
+	// whether the value was set at the global config level (not just
+	// as a default). For generation params, the global config keys use
+	// hyphenated names (e.g. "max-tokens", "top-p").
+	//
+	// Since viper merges all sources, IsSet returns true even for config
+	// file values. This means global config file values (e.g.
+	// temperature: 0.7 at the top level) will correctly take precedence
+	// over model-level defaults, which is the desired behavior.
+	return v.IsSet(key)
+}
+
+// GenerationParams holds per-model generation parameter defaults.
+// These are stored on ModelInfo and applied during provider creation.
+// Nil pointer fields mean "no model-level default" — the global config
+// or CLI flag value (if any) will be used instead.
+type GenerationParams struct {
+	MaxTokens        *int
+	Temperature      *float32
+	TopP             *float32
+	TopK             *int32
+	FrequencyPenalty *float32
+	PresencePenalty  *float32
+	StopSequences    []string
+	ThinkingLevel    ThinkingLevel
+	SystemPrompt     string // Per-model system prompt (inline text or file path)
 }

 // CustomModelConfig defines a custom model configuration loaded from the config file.
 // This is a duplicate here to avoid circular dependencies with internal/config.
 type CustomModelConfig struct {
-	Name        string      `json:"name" yaml:"name"`
-	BaseURL     string      `json:"baseUrl,omitempty" yaml:"baseUrl,omitempty"`
-	APIKey      string      `json:"apiKey,omitempty" yaml:"apiKey,omitempty"`
-	Family      string      `json:"family,omitempty" yaml:"family,omitempty"`
-	Attachment  bool        `json:"attachment,omitempty" yaml:"attachment,omitempty"`
-	Reasoning   bool        `json:"reasoning,omitempty" yaml:"reasoning,omitempty"`
-	Temperature bool        `json:"temperature,omitempty" yaml:"temperature,omitempty"`
-	Knowledge   string      `json:"knowledge,omitempty" yaml:"knowledge,omitempty"`
-	Cost        CostConfig  `json:"cost" yaml:"cost"`
-	Limit       LimitConfig `json:"limit" yaml:"limit"`
+	Name         string                 `json:"name" yaml:"name"`
+	BaseURL      string                 `json:"baseUrl,omitempty" yaml:"baseUrl,omitempty"`
+	APIKey       string                 `json:"apiKey,omitempty" yaml:"apiKey,omitempty"`
+	APIModelName string                 `json:"apiModelName,omitempty" yaml:"apiModelName,omitempty"`
+	Family       string                 `json:"family,omitempty" yaml:"family,omitempty"`
+	Attachment   bool                   `json:"attachment,omitempty" yaml:"attachment,omitempty"`
+	Reasoning    bool                   `json:"reasoning,omitempty" yaml:"reasoning,omitempty"`
+	Temperature  bool                   `json:"temperature,omitempty" yaml:"temperature,omitempty"`
+	Knowledge    string                 `json:"knowledge,omitempty" yaml:"knowledge,omitempty"`
+	Cost         CostConfig             `json:"cost" yaml:"cost"`
+	Limit        LimitConfig            `json:"limit" yaml:"limit"`
+	Params       GenerationParamsConfig `json:"params,omitzero" yaml:"params,omitempty"`
+}
+
+// GenerationParamsConfig is the JSON/YAML-serializable form of generation
+// parameter defaults. Used in both customModels[].params and modelSettings[].
+type GenerationParamsConfig struct {
+	MaxTokens        *int     `json:"maxTokens,omitempty" yaml:"maxTokens,omitempty"`
+	Temperature      *float32 `json:"temperature,omitempty" yaml:"temperature,omitempty"`
+	TopP             *float32 `json:"topP,omitempty" yaml:"topP,omitempty"`
+	TopK             *int32   `json:"topK,omitempty" yaml:"topK,omitempty"`
+	FrequencyPenalty *float32 `json:"frequencyPenalty,omitempty" yaml:"frequencyPenalty,omitempty"`
+	PresencePenalty  *float32 `json:"presencePenalty,omitempty" yaml:"presencePenalty,omitempty"`
+	StopSequences    []string `json:"stopSequences,omitempty" yaml:"stopSequences,omitempty"`
+	ThinkingLevel    string   `json:"thinkingLevel,omitempty" yaml:"thinkingLevel,omitempty"`
+	SystemPrompt     string   `json:"systemPrompt,omitempty" yaml:"systemPrompt,omitempty"`
 }

 // CostConfig defines the pricing for a custom model.
@@ -0,0 +1,422 @@
+package models
+
+import (
+	"os"
+	"testing"
+
+	"github.com/spf13/viper"
+)
+
+func TestConvertGenerationParams(t *testing.T) {
+	t.Run("empty config returns nil", func(t *testing.T) {
+		cfg := GenerationParamsConfig{}
+		p := convertGenerationParams(cfg)
+		if p != nil {
+			t.Errorf("expected nil, got %+v", p)
+		}
+	})
+
+	t.Run("temperature only", func(t *testing.T) {
+		temp := float32(0.7)
+		cfg := GenerationParamsConfig{Temperature: &temp}
+		p := convertGenerationParams(cfg)
+		if p == nil {
+			t.Fatal("expected non-nil")
+		}
+		if p.Temperature == nil || *p.Temperature != 0.7 {
+			t.Errorf("expected temperature 0.7, got %v", p.Temperature)
+		}
+		if p.TopP != nil {
+			t.Errorf("expected nil TopP, got %v", p.TopP)
+		}
+	})
+
+	t.Run("all params set", func(t *testing.T) {
+		maxTokens := 8192
+		temp := float32(0.5)
+		topP := float32(0.9)
+		topK := int32(50)
+		freqPenalty := float32(0.1)
+		presPenalty := float32(0.2)
+		cfg := GenerationParamsConfig{
+			MaxTokens:        &maxTokens,
+			Temperature:      &temp,
+			TopP:             &topP,
+			TopK:             &topK,
+			FrequencyPenalty: &freqPenalty,
+			PresencePenalty:  &presPenalty,
+			StopSequences:    []string{"STOP"},
+			ThinkingLevel:    "high",
+		}
+		p := convertGenerationParams(cfg)
+		if p == nil {
+			t.Fatal("expected non-nil")
+		}
+		if p.MaxTokens == nil || *p.MaxTokens != 8192 {
+			t.Errorf("expected maxTokens 8192, got %v", p.MaxTokens)
+		}
+		if p.Temperature == nil || *p.Temperature != 0.5 {
+			t.Errorf("expected temperature 0.5, got %v", p.Temperature)
+		}
+		if p.TopP == nil || *p.TopP != 0.9 {
+			t.Errorf("expected topP 0.9, got %v", p.TopP)
+		}
+		if p.TopK == nil || *p.TopK != 50 {
+			t.Errorf("expected topK 50, got %v", p.TopK)
+		}
+		if p.FrequencyPenalty == nil || *p.FrequencyPenalty != 0.1 {
+			t.Errorf("expected frequencyPenalty 0.1, got %v", p.FrequencyPenalty)
+		}
+		if p.PresencePenalty == nil || *p.PresencePenalty != 0.2 {
+			t.Errorf("expected presencePenalty 0.2, got %v", p.PresencePenalty)
+		}
+		if len(p.StopSequences) != 1 || p.StopSequences[0] != "STOP" {
+			t.Errorf("expected stop sequences [STOP], got %v", p.StopSequences)
+		}
+		if p.ThinkingLevel != ThinkingHigh {
+			t.Errorf("expected thinking level high, got %v", p.ThinkingLevel)
+		}
+	})
+
+	t.Run("thinking level parsing", func(t *testing.T) {
+		cfg := GenerationParamsConfig{ThinkingLevel: "medium"}
+		p := convertGenerationParams(cfg)
+		if p == nil {
+			t.Fatal("expected non-nil")
+		}
+		if p.ThinkingLevel != ThinkingMedium {
+			t.Errorf("expected thinking level medium, got %v", p.ThinkingLevel)
+		}
+	})
+	t.Run("system prompt only", func(t *testing.T) {
+		cfg := GenerationParamsConfig{SystemPrompt: "You are helpful."}
+		p := convertGenerationParams(cfg)
+		if p == nil {
+			t.Fatal("expected non-nil")
+		}
+		if p.SystemPrompt != "You are helpful." {
+			t.Errorf("expected system prompt, got %q", p.SystemPrompt)
+		}
+	})
+}
+
+func TestModelConfigToModelInfoWithParams(t *testing.T) {
+	temp := float32(0.8)
+	topP := float32(0.95)
+	cfg := CustomModelConfig{
+		Name:        "Test Model",
+		BaseURL:     "http://localhost:8080/v1",
+		Temperature: true,
+		Params: GenerationParamsConfig{
+			Temperature: &temp,
+			TopP:        &topP,
+		},
+	}
+
+	info := modelConfigToModelInfo("test-model", cfg)
+
+	if info.Params == nil {
+		t.Fatal("expected non-nil Params")
+	}
+	if info.Params.Temperature == nil || *info.Params.Temperature != 0.8 {
+		t.Errorf("expected temperature 0.8, got %v", info.Params.Temperature)
+	}
+	if info.Params.TopP == nil || *info.Params.TopP != 0.95 {
+		t.Errorf("expected topP 0.95, got %v", info.Params.TopP)
+	}
+}
+
+func TestModelConfigToModelInfoWithoutParams(t *testing.T) {
+	cfg := CustomModelConfig{
+		Name:    "Test Model",
+		BaseURL: "http://localhost:8080/v1",
+	}
+
+	info := modelConfigToModelInfo("test-model", cfg)
+
+	if info.Params != nil {
+		t.Errorf("expected nil Params, got %+v", info.Params)
+	}
+}
+
+func TestApplyModelSettings(t *testing.T) {
+	// Save and restore viper state.
+	originalViper := viper.AllSettings()
+	defer func() {
+		viper.Reset()
+		for k, v := range originalViper {
+			viper.Set(k, v)
+		}
+	}()
+
+	t.Run("applies model params when not explicitly set", func(t *testing.T) {
+		viper.Reset()
+
+		temp := float32(0.8)
+		topK := int32(50)
+		maxTokens := 4096
+		modelInfo := &ModelInfo{
+			ID: "test-model",
+			Params: &GenerationParams{
+				Temperature: &temp,
+				TopK:        &topK,
+				MaxTokens:   &maxTokens,
+			},
+		}
+
+		config := &ProviderConfig{
+			ModelString: "custom/test-model",
+		}
+
+		ApplyModelSettings(config, modelInfo)
+
+		if config.Temperature == nil || *config.Temperature != 0.8 {
+			t.Errorf("expected temperature 0.8, got %v", config.Temperature)
+		}
+		if config.TopK == nil || *config.TopK != 50 {
+			t.Errorf("expected topK 50, got %v", config.TopK)
+		}
+		if config.MaxTokens != 4096 {
+			t.Errorf("expected maxTokens 4096, got %d", config.MaxTokens)
+		}
+	})
+
+	t.Run("explicit viper values take precedence", func(t *testing.T) {
+		viper.Reset()
+		viper.Set("temperature", 0.3)
+
+		temp := float32(0.8)
+		modelInfo := &ModelInfo{
+			ID: "test-model",
+			Params: &GenerationParams{
+				Temperature: &temp,
+			},
+		}
+
+		explicitTemp := float32(0.3)
+		config := &ProviderConfig{
+			ModelString: "custom/test-model",
+			Temperature: &explicitTemp,
+		}
+
+		ApplyModelSettings(config, modelInfo)
+
+		// Temperature should NOT be overridden because it's explicitly set in viper
+		if config.Temperature == nil || *config.Temperature != 0.3 {
+			t.Errorf("expected temperature 0.3 (explicit), got %v", config.Temperature)
+		}
+	})
+
+	t.Run("nil model info is safe", func(t *testing.T) {
+		viper.Reset()
+
+		config := &ProviderConfig{
+			ModelString: "custom/test-model",
+		}
+
+		// Should not panic
+		ApplyModelSettings(config, nil)
+
+		if config.Temperature != nil {
+			t.Errorf("expected nil temperature, got %v", config.Temperature)
+		}
+	})
+
+	t.Run("model info without params is safe", func(t *testing.T) {
+		viper.Reset()
+
+		modelInfo := &ModelInfo{ID: "test-model"}
+		config := &ProviderConfig{
+			ModelString: "custom/test-model",
+		}
+
+		ApplyModelSettings(config, modelInfo)
+
+		if config.Temperature != nil {
+			t.Errorf("expected nil temperature, got %v", config.Temperature)
+		}
+	})
+
+	t.Run("modelSettings from viper takes priority over ModelInfo.Params", func(t *testing.T) {
+		viper.Reset()
+
+		// Set up modelSettings in viper (simulating config file)
+		viper.Set("modelSettings", map[string]any{
+			"custom/test-model": map[string]any{
+				"temperature": 0.5,
+				"topK":        30,
+			},
+		})
+
+		// ModelInfo has different params
+		temp := float32(0.8)
+		topK := int32(50)
+		modelInfo := &ModelInfo{
+			ID: "test-model",
+			Params: &GenerationParams{
+				Temperature: &temp,
+				TopK:        &topK,
+			},
+		}
+
+		config := &ProviderConfig{
+			ModelString: "custom/test-model",
+		}
+
+		ApplyModelSettings(config, modelInfo)
+
+		// modelSettings should win over ModelInfo.Params
+		if config.Temperature == nil || *config.Temperature != 0.5 {
+			t.Errorf("expected temperature 0.5 (from modelSettings), got %v", config.Temperature)
+		}
+		if config.TopK == nil || *config.TopK != 30 {
+			t.Errorf("expected topK 30 (from modelSettings), got %v", config.TopK)
+		}
+	})
+
+	t.Run("stop sequences applied from model params", func(t *testing.T) {
+		viper.Reset()
+
+		modelInfo := &ModelInfo{
+			ID: "test-model",
+			Params: &GenerationParams{
+				StopSequences: []string{"STOP", "END"},
+			},
+		}
+
+		config := &ProviderConfig{
+			ModelString: "custom/test-model",
+		}
+
+		ApplyModelSettings(config, modelInfo)
+
+		if len(config.StopSequences) != 2 || config.StopSequences[0] != "STOP" {
+			t.Errorf("expected stop sequences [STOP END], got %v", config.StopSequences)
+		}
+	})
+
+	t.Run("thinking level applied from model params", func(t *testing.T) {
+		viper.Reset()
+
+		modelInfo := &ModelInfo{
+			ID: "test-model",
+			Params: &GenerationParams{
+				ThinkingLevel: ThinkingHigh,
+			},
+		}
+
+		config := &ProviderConfig{
+			ModelString: "custom/test-model",
+		}
+
+		ApplyModelSettings(config, modelInfo)
+
+		if config.ThinkingLevel != ThinkingHigh {
+			t.Errorf("expected thinking level high, got %v", config.ThinkingLevel)
+		}
+	})
+
+	t.Run("system prompt applied from model params", func(t *testing.T) {
+		viper.Reset()
+
+		modelInfo := &ModelInfo{
+			ID: "test-model",
+			Params: &GenerationParams{
+				SystemPrompt: "You are a coding assistant.",
+			},
+		}
+
+		config := &ProviderConfig{
+			ModelString: "custom/test-model",
+		}
+
+		ApplyModelSettings(config, modelInfo)
+
+		if config.SystemPrompt != "You are a coding assistant." {
+			t.Errorf("expected system prompt to be set, got %q", config.SystemPrompt)
+		}
+	})
+
+	t.Run("explicit system prompt takes precedence", func(t *testing.T) {
+		viper.Reset()
+
+		modelInfo := &ModelInfo{
+			ID: "test-model",
+			Params: &GenerationParams{
+				SystemPrompt: "Model-specific prompt",
+			},
+		}
+
+		config := &ProviderConfig{
+			ModelString:  "custom/test-model",
+			SystemPrompt: "Global prompt",
+		}
+
+		ApplyModelSettings(config, modelInfo)
+
+		// Global system prompt should NOT be overridden because config
+		// already has a non-empty SystemPrompt.
+		if config.SystemPrompt != "Global prompt" {
+			t.Errorf("expected global prompt preserved, got %q", config.SystemPrompt)
+		}
+	})
+
+	t.Run("system prompt from file path", func(t *testing.T) {
+		viper.Reset()
+
+		// Create a temp file with a system prompt
+		tmpFile, err := os.CreateTemp("", "kit-test-prompt-*.txt")
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer func() { _ = os.Remove(tmpFile.Name()) }()
+		if _, err := tmpFile.WriteString("  Prompt from file  "); err != nil {
+			t.Fatal(err)
+		}
+		_ = tmpFile.Close()
+
+		modelInfo := &ModelInfo{
+			ID: "test-model",
+			Params: &GenerationParams{
+				SystemPrompt: tmpFile.Name(),
+			},
+		}
+
+		config := &ProviderConfig{
+			ModelString: "custom/test-model",
+		}
+
+		ApplyModelSettings(config, modelInfo)
+
+		if config.SystemPrompt != "Prompt from file" {
+			t.Errorf("expected trimmed file content, got %q", config.SystemPrompt)
+		}
+	})
+
+	t.Run("modelSettings system prompt overrides custom model params", func(t *testing.T) {
+		viper.Reset()
+
+		viper.Set("modelSettings", map[string]any{
+			"custom/test-model": map[string]any{
+				"systemPrompt": "From modelSettings",
+			},
+		})
+
+		modelInfo := &ModelInfo{
+			ID: "test-model",
+			Params: &GenerationParams{
+				SystemPrompt: "From custom model",
+			},
+		}
+
+		config := &ProviderConfig{
+			ModelString: "custom/test-model",
+		}
+
+		ApplyModelSettings(config, modelInfo)
+
+		if config.SystemPrompt != "From modelSettings" {
+			t.Errorf("expected modelSettings prompt, got %q", config.SystemPrompt)
+		}
+	})
+}
@@ -48,18 +48,87 @@ type modelsDBLimit struct {
 	Output  int `json:"output"`
 }

-// npmToLLMProvider maps npm package names from models.dev to LLM
-// provider identifiers. Providers not in this map but with an api URL
-// can be auto-routed through openaicompat.
-var npmToLLMProvider = map[string]string{
-	"@ai-sdk/anthropic":               "anthropic",
-	"@ai-sdk/openai":                  "openai",
-	"@ai-sdk/google":                  "google",
-	"@ai-sdk/google-vertex":           "google-vertex",
-	"@ai-sdk/google-vertex/anthropic": "google-vertex-anthropic",
-	"@ai-sdk/amazon-bedrock":          "bedrock",
-	"@ai-sdk/azure":                   "azure",
-	"@openrouter/ai-sdk-provider":     "openrouter",
-	"@ai-sdk/vercel":                  "vercel",
-	"@ai-sdk/openai-compatible":       "openaicompat",
+// wireProtocol identifies which LLM API protocol an npm package speaks.
+// Fantasy implements three native protocols (openai, anthropic, google);
+// everything else in its providers/ tree is a thin wrapper around one of
+// them with a pre-baked default URL or auth scheme.
+type wireProtocol int
+
+const (
+	wireUnknown wireProtocol = iota
+	wireOpenAI
+	wireAnthropic
+	wireGoogle
+)
+
+// npmToWireProtocol maps npm package names from models.dev to the wire
+// protocol they speak. Provider-specific bundles that need bespoke auth or
+// URL templating (azure, bedrock, openrouter, google-vertex, google-vertex-
+// anthropic, and @ai-sdk/gateway which is the Vercel AI Gateway) are
+// intentionally absent — they have native top-level cases in CreateProvider
+// and never reach the auto-router. Providers not in this map but with an
+// api URL are auto-routed through the OpenAI-compatible wire.
+//
+// The thin OpenAI-compatible npm wrappers (groq, cerebras, mistral, …) are
+// listed explicitly so that auto-routing can recover their hard-coded base
+// URL from sdkDefaultBaseURL when the registry entry has no api field.
+var npmToWireProtocol = map[string]wireProtocol{
+	// Native wires.
+	"@ai-sdk/openai":            wireOpenAI,
+	"@ai-sdk/openai-compatible": wireOpenAI,
+	"@ai-sdk/anthropic":         wireAnthropic,
+	"@ai-sdk/google":            wireGoogle,
+
+	// Thin OpenAI-compatible wrappers. Each ships with a hard-coded base URL
+	// in its JS SDK (see sdkDefaultBaseURL) but speaks the plain OpenAI chat
+	// completions wire — so we can route them all through fantasy's
+	// openaicompat provider once we supply the URL.
+	"@ai-sdk/groq":                  wireOpenAI,
+	"@ai-sdk/cerebras":              wireOpenAI,
+	"@ai-sdk/perplexity":            wireOpenAI,
+	"@ai-sdk/togetherai":            wireOpenAI,
+	"@ai-sdk/xai":                   wireOpenAI,
+	"@ai-sdk/deepinfra":             wireOpenAI,
+	"@ai-sdk/mistral":               wireOpenAI,
+	"@ai-sdk/cohere":                wireOpenAI,
+	"@ai-sdk/vercel":                wireOpenAI, // v0 API (api.v0.dev), distinct from @ai-sdk/gateway
+	"@aihubmix/ai-sdk-provider":     wireOpenAI,
+	"venice-ai-sdk-provider":        wireOpenAI,
+	"merge-gateway-ai-sdk-provider": wireOpenAI,
+}
+
+// sdkDefaultBaseURL maps an npm package name to the base URL its JavaScript
+// SDK uses by default. This lets us recover a working endpoint for providers
+// whose models.dev entry omits the `api` field because the JS SDK hard-codes
+// the URL (e.g. groq, cerebras, mistral, x.ai…).
+//
+// Only OpenAI-compatible and native-wire SDKs are listed; providers needing
+// bespoke auth or URL templating (bedrock SigV4, azure resource URLs,
+// google-vertex project/location, cloudflare gateway account IDs, gitlab,
+// sap-ai-core) are handled by native CreateProvider cases or surface a
+// targeted error that asks the user to supply --provider-url.
+var sdkDefaultBaseURL = map[string]string{
+	// Native wires.
+	"@ai-sdk/openai":    "https://api.openai.com/v1",
+	"@ai-sdk/anthropic": "https://api.anthropic.com/v1",
+	"@ai-sdk/google":    "https://generativelanguage.googleapis.com/v1beta",
+
+	// Thin OpenAI-compatible wrappers.
+	"@ai-sdk/groq":                  "https://api.groq.com/openai/v1",
+	"@ai-sdk/cerebras":              "https://api.cerebras.ai/v1",
+	"@ai-sdk/perplexity":            "https://api.perplexity.ai",
+	"@ai-sdk/togetherai":            "https://api.together.xyz/v1",
+	"@ai-sdk/xai":                   "https://api.x.ai/v1",
+	"@ai-sdk/deepinfra":             "https://api.deepinfra.com/v1/openai",
+	"@ai-sdk/mistral":               "https://api.mistral.ai/v1",
+	"@ai-sdk/cohere":                "https://api.cohere.com/compatibility/v1",
+	"@ai-sdk/vercel":                "https://api.v0.dev/v1",
+	"@aihubmix/ai-sdk-provider":     "https://aihubmix.com/v1",
+	"venice-ai-sdk-provider":        "https://api.venice.ai/api/v1",
+	"merge-gateway-ai-sdk-provider": "https://api-gateway.merge.dev/v1/ai-sdk",
+
+	// Native handlers — included for ResolveProviderBaseURL introspection
+	// even though CreateProvider routes these via dedicated cases.
+	"@ai-sdk/gateway":             "https://ai-gateway.vercel.sh/v1",
+	"@openrouter/ai-sdk-provider": "https://openrouter.ai/api/v1",
 }
@@ -1,168 +0,0 @@
-package models
-
-import (
-	"context"
-	"sync"
-	"time"
-
-	"charm.land/fantasy"
-)
-
-// ProviderPool manages reusable LLM provider instances to reduce overhead
-// when spawning multiple subagents or making repeated completion calls.
-type ProviderPool struct {
-	mu        sync.RWMutex
-	providers map[string]*pooledProvider
-	ttl       time.Duration
-	closed    bool
-	closeCh   chan struct{}
-}
-
-type pooledProvider struct {
-	model        fantasy.LanguageModel
-	closer       func() error
-	providerOpts fantasy.ProviderOptions
-	created      time.Time
-	lastUsed     time.Time
-	refs         int32
-}
-
-// DefaultPoolTTL is the default time-to-live for idle pooled providers.
-const DefaultPoolTTL = 5 * time.Minute
-
-// globalPool is the singleton provider pool instance.
-var globalPool *ProviderPool
-var poolOnce sync.Once
-
-// GetGlobalPool returns the singleton provider pool instance.
-func GetGlobalPool() *ProviderPool {
-	poolOnce.Do(func() {
-		globalPool = NewProviderPool(DefaultPoolTTL)
-	})
-	return globalPool
-}
-
-// NewProviderPool creates a provider pool with the given TTL for idle providers.
-func NewProviderPool(ttl time.Duration) *ProviderPool {
-	p := &ProviderPool{
-		providers: make(map[string]*pooledProvider),
-		ttl:       ttl,
-		closeCh:   make(chan struct{}),
-	}
-	go p.cleanupLoop()
-	return p
-}
-
-// Get returns a provider for the model string, creating one if needed.
-// The returned release function must be called when the provider is no longer
-// needed. The provider may be reused by subsequent Get calls.
-func (p *ProviderPool) Get(ctx context.Context, modelString string) (fantasy.LanguageModel, fantasy.ProviderOptions, func(), error) {
-	p.mu.Lock()
-
-	// Check if we have an existing provider.
-	if pp, ok := p.providers[modelString]; ok {
-		pp.refs++
-		pp.lastUsed = time.Now()
-		p.mu.Unlock()
-		return pp.model, pp.providerOpts, func() { p.release(modelString) }, nil
-	}
-
-	p.mu.Unlock()
-
-	// Create a new provider outside the lock.
-	config := &ProviderConfig{ModelString: modelString}
-	result, err := CreateProvider(ctx, config)
-	if err != nil {
-		return nil, nil, nil, err
-	}
-
-	p.mu.Lock()
-	defer p.mu.Unlock()
-
-	// Double-check: another goroutine may have created one while we were unlocked.
-	if pp, ok := p.providers[modelString]; ok {
-		// Close the one we just created and use the existing one.
-		if result.Closer != nil {
-			_ = result.Closer.Close()
-		}
-		pp.refs++
-		pp.lastUsed = time.Now()
-		return pp.model, pp.providerOpts, func() { p.release(modelString) }, nil
-	}
-
-	var closerFn func() error
-	if result.Closer != nil {
-		closerFn = result.Closer.Close
-	}
-
-	pp := &pooledProvider{
-		model:        result.Model,
-		closer:       closerFn,
-		providerOpts: result.ProviderOptions,
-		created:      time.Now(),
-		lastUsed:     time.Now(),
-		refs:         1,
-	}
-	p.providers[modelString] = pp
-
-	return pp.model, pp.providerOpts, func() { p.release(modelString) }, nil
-}
-
-func (p *ProviderPool) release(modelString string) {
-	p.mu.Lock()
-	defer p.mu.Unlock()
-
-	if pp, ok := p.providers[modelString]; ok {
-		pp.refs--
-		pp.lastUsed = time.Now()
-	}
-}
-
-func (p *ProviderPool) cleanupLoop() {
-	ticker := time.NewTicker(p.ttl / 2)
-	defer ticker.Stop()
-
-	for {
-		select {
-		case <-p.closeCh:
-			return
-		case <-ticker.C:
-			p.cleanup()
-		}
-	}
-}
-
-func (p *ProviderPool) cleanup() {
-	p.mu.Lock()
-	defer p.mu.Unlock()
-
-	now := time.Now()
-	for key, pp := range p.providers {
-		// Only clean up providers with no active references and past TTL.
-		if pp.refs <= 0 && now.Sub(pp.lastUsed) > p.ttl {
-			if pp.closer != nil {
-				_ = pp.closer()
-			}
-			delete(p.providers, key)
-		}
-	}
-}
-
-// Close shuts down the pool and releases all providers.
-func (p *ProviderPool) Close() {
-	p.mu.Lock()
-	if p.closed {
-		p.mu.Unlock()
-		return
-	}
-	p.closed = true
-	close(p.closeCh)
-
-	for key, pp := range p.providers {
-		if pp.closer != nil {
-			_ = pp.closer()
-		}
-		delete(p.providers, key)
-	}
-	p.mu.Unlock()
-}
--- a/Show More
+++ b/Show More