mirror of
https://github.com/mark3labs/kit.git
synced 2026-06-16 04:26:04 +00:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 70cd214175 | |||
| 35b9360d64 | |||
| 1b8373e133 | |||
| 1a5e4ce7c5 | |||
| 8823977612 | |||
| 24e2ea111c | |||
| 31ea80ec4f | |||
| 99f2680c2e | |||
| da7e05eb87 | |||
| a95714a22d |
+24
-6
@@ -784,6 +784,16 @@ func runNormalMode(ctx context.Context) error {
|
||||
}
|
||||
defer func() { _ = kitInstance.Close() }()
|
||||
|
||||
// Build the "System Prompt loaded" notice shown at startup, paralleling the
|
||||
// per-server "MCP server loaded" notifications so users can confirm that a
|
||||
// configured prompt file was found and applied.
|
||||
var systemPromptLoadedMsg string
|
||||
if kitInstance.HasCustomSystemPrompt() {
|
||||
if src := kitInstance.GetSystemPromptSource(); src != "" {
|
||||
systemPromptLoadedMsg = "System Prompt loaded: " + src
|
||||
}
|
||||
}
|
||||
|
||||
// Extract metadata for display and app options.
|
||||
parsedProvider, modelName, serverNames, toolNames, mcpToolCount, extensionToolCount := CollectAgentMetadata(kitInstance, mcpConfig)
|
||||
|
||||
@@ -801,6 +811,9 @@ func runNormalMode(ctx context.Context) error {
|
||||
}
|
||||
|
||||
DisplayDebugConfig(cli, kitInstance, mcpConfig, parsedProvider)
|
||||
if systemPromptLoadedMsg != "" && cli != nil {
|
||||
cli.DisplayInfo(systemPromptLoadedMsg)
|
||||
}
|
||||
}
|
||||
|
||||
// Load existing messages from resumed/continued sessions.
|
||||
@@ -840,6 +853,9 @@ func runNormalMode(ctx context.Context) error {
|
||||
|
||||
// Buffer for extension messages during startup (printed after startup banner).
|
||||
var startupExtensionMessages []string
|
||||
if systemPromptLoadedMsg != "" {
|
||||
startupExtensionMessages = append(startupExtensionMessages, systemPromptLoadedMsg)
|
||||
}
|
||||
|
||||
// Set up extension context and emit SessionStart.
|
||||
if kitInstance.Extensions().HasExtensions() {
|
||||
@@ -919,9 +935,10 @@ func runNormalMode(ctx context.Context) error {
|
||||
source = "project"
|
||||
}
|
||||
skillItems = append(skillItems, ui.SkillItem{
|
||||
Name: s.Name,
|
||||
Path: s.Path,
|
||||
Source: source,
|
||||
Name: s.Name,
|
||||
Path: s.Path,
|
||||
Source: source,
|
||||
Description: s.Description,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -960,9 +977,10 @@ func runNormalMode(ctx context.Context) error {
|
||||
source = "project"
|
||||
}
|
||||
items = append(items, ui.SkillItem{
|
||||
Name: s.Name,
|
||||
Path: s.Path,
|
||||
Source: source,
|
||||
Name: s.Name,
|
||||
Path: s.Path,
|
||||
Source: source,
|
||||
Description: s.Description,
|
||||
})
|
||||
}
|
||||
return items
|
||||
|
||||
@@ -9,12 +9,19 @@ import (
|
||||
"github.com/mark3labs/kit/internal/tools"
|
||||
)
|
||||
|
||||
// mcpExecutor is the subset of *tools.MCPToolManager that the adapter
|
||||
// actually uses. Extracted as an interface so the adapter is unit-testable
|
||||
// without constructing a full manager + connection pool.
|
||||
type mcpExecutor interface {
|
||||
ExecuteTool(ctx context.Context, prefixedName, inputJSON string) (*tools.MCPToolResult, error)
|
||||
}
|
||||
|
||||
// mcpAgentTool adapts an tools.MCPTool to the fantasy.AgentTool interface.
|
||||
// This keeps the fantasy dependency confined to the agent layer — the tools
|
||||
// package is a pure MCP client library with no LLM framework dependency.
|
||||
type mcpAgentTool struct {
|
||||
tool tools.MCPTool
|
||||
manager *tools.MCPToolManager
|
||||
exec mcpExecutor
|
||||
providerOptions fantasy.ProviderOptions
|
||||
}
|
||||
|
||||
@@ -29,10 +36,26 @@ func (t *mcpAgentTool) Info() fantasy.ToolInfo {
|
||||
}
|
||||
|
||||
// Run executes the MCP tool by delegating to the MCPToolManager.
|
||||
//
|
||||
// MCP-side failures (JSON-RPC protocol errors, transport failures, schema
|
||||
// validation rejections from the server) are surfaced to the model as soft
|
||||
// tool errors rather than escalated to a critical agent error. This matches
|
||||
// the contract that native Kit tools follow via kit.ErrorResult(...) and
|
||||
// lets the model self-correct (e.g. retry with a fixed argument shape) or
|
||||
// give up gracefully rather than aborting the turn mid-run.
|
||||
//
|
||||
// Context cancellation is the one exception: if the caller cancelled the
|
||||
// context the turn was aborted intentionally, so we propagate the ctx error
|
||||
// to let the agent loop unwind cleanly.
|
||||
func (t *mcpAgentTool) Run(ctx context.Context, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
|
||||
result, err := t.manager.ExecuteTool(ctx, t.tool.Name, call.Input)
|
||||
result, err := t.exec.ExecuteTool(ctx, t.tool.Name, call.Input)
|
||||
if err != nil {
|
||||
return fantasy.ToolResponse{}, fmt.Errorf("mcp tool execution failed: %w", err)
|
||||
if ctxErr := ctx.Err(); ctxErr != nil {
|
||||
return fantasy.ToolResponse{}, ctxErr
|
||||
}
|
||||
return fantasy.NewTextErrorResponse(
|
||||
fmt.Sprintf("MCP tool %q failed: %s", t.tool.Name, err.Error()),
|
||||
), nil
|
||||
}
|
||||
|
||||
if result.IsError {
|
||||
@@ -57,8 +80,8 @@ func mcpToolsToAgentTools(mcpTools []tools.MCPTool, manager *tools.MCPToolManage
|
||||
agentTools := make([]fantasy.AgentTool, len(mcpTools))
|
||||
for i, t := range mcpTools {
|
||||
agentTools[i] = &mcpAgentTool{
|
||||
tool: t,
|
||||
manager: manager,
|
||||
tool: t,
|
||||
exec: manager,
|
||||
}
|
||||
}
|
||||
return agentTools
|
||||
|
||||
@@ -0,0 +1,158 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"charm.land/fantasy"
|
||||
|
||||
"github.com/mark3labs/kit/internal/tools"
|
||||
)
|
||||
|
||||
// stubExecutor lets each test script the (result, err) pair returned by
|
||||
// ExecuteTool. The adapter holds an mcpExecutor interface, so this is the
|
||||
// only seam the tests need.
|
||||
type stubExecutor struct {
|
||||
result *tools.MCPToolResult
|
||||
err error
|
||||
// called records the last invocation for assertion.
|
||||
called bool
|
||||
name string
|
||||
input string
|
||||
}
|
||||
|
||||
func (s *stubExecutor) ExecuteTool(_ context.Context, prefixedName, inputJSON string) (*tools.MCPToolResult, error) {
|
||||
s.called = true
|
||||
s.name = prefixedName
|
||||
s.input = inputJSON
|
||||
return s.result, s.err
|
||||
}
|
||||
|
||||
func newMCPAgentTool(exec mcpExecutor, name string) *mcpAgentTool {
|
||||
return &mcpAgentTool{
|
||||
tool: tools.MCPTool{Name: name},
|
||||
exec: exec,
|
||||
}
|
||||
}
|
||||
|
||||
// Manager-side Go errors (JSON-RPC protocol errors, transport failures,
|
||||
// schema validation rejections from the MCP server) must be surfaced to
|
||||
// the model as soft tool errors so the agent loop can keep going. Aborting
|
||||
// the turn would discard all prior tool results — see issue #N.
|
||||
func TestMCPAgentTool_RPCErrorBecomesSoftError(t *testing.T) {
|
||||
exec := &stubExecutor{
|
||||
err: errors.New("MCP error -32602: Invalid params: missing field \"task\""),
|
||||
}
|
||||
tool := newMCPAgentTool(exec, "pubmed__search")
|
||||
|
||||
resp, err := tool.Run(context.Background(), fantasy.ToolCall{
|
||||
ID: "call-1",
|
||||
Name: "pubmed__search",
|
||||
Input: `{"query":"foo"}`,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("expected nil error (soft), got %v", err)
|
||||
}
|
||||
if !resp.IsError {
|
||||
t.Fatalf("expected IsError=true, got false")
|
||||
}
|
||||
if !strings.Contains(resp.Content, "pubmed__search") {
|
||||
t.Errorf("expected tool name in error content, got %q", resp.Content)
|
||||
}
|
||||
if !strings.Contains(resp.Content, "-32602") {
|
||||
t.Errorf("expected underlying error text in content, got %q", resp.Content)
|
||||
}
|
||||
}
|
||||
|
||||
// Context cancellation is the one error that must remain critical: it
|
||||
// means the caller intentionally aborted, and the agent loop needs to
|
||||
// unwind cleanly rather than burning more steps.
|
||||
func TestMCPAgentTool_CtxCancelStaysCritical(t *testing.T) {
|
||||
exec := &stubExecutor{
|
||||
// Real managers typically return ctx.Err() (or a wrapper) when the
|
||||
// context is cancelled mid-call.
|
||||
err: context.Canceled,
|
||||
}
|
||||
tool := newMCPAgentTool(exec, "slow__tool")
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
|
||||
resp, err := tool.Run(ctx, fantasy.ToolCall{Name: "slow__tool"})
|
||||
|
||||
if !errors.Is(err, context.Canceled) {
|
||||
t.Fatalf("expected context.Canceled, got %v", err)
|
||||
}
|
||||
if resp.IsError || resp.Content != "" {
|
||||
t.Errorf("expected empty response on critical error, got IsError=%v Content=%q", resp.IsError, resp.Content)
|
||||
}
|
||||
}
|
||||
|
||||
// Deadline-exceeded behaves the same as cancellation: ctx.Err() is
|
||||
// non-nil, so the adapter must propagate the critical error rather than
|
||||
// converting the executor's error into a soft response.
|
||||
func TestMCPAgentTool_CtxDeadlineStaysCritical(t *testing.T) {
|
||||
exec := &stubExecutor{err: context.DeadlineExceeded}
|
||||
tool := newMCPAgentTool(exec, "slow__tool")
|
||||
|
||||
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(-time.Second))
|
||||
defer cancel()
|
||||
|
||||
resp, err := tool.Run(ctx, fantasy.ToolCall{Name: "slow__tool"})
|
||||
if !errors.Is(err, context.DeadlineExceeded) {
|
||||
t.Fatalf("expected context.DeadlineExceeded, got %v", err)
|
||||
}
|
||||
if resp.IsError || resp.Content != "" {
|
||||
t.Errorf("expected empty response on critical error, got IsError=%v Content=%q", resp.IsError, resp.Content)
|
||||
}
|
||||
}
|
||||
|
||||
// Server-side soft errors (CallToolResult{ isError: true }) must continue
|
||||
// to flow through as soft errors — this was the existing behavior and
|
||||
// must not regress.
|
||||
func TestMCPAgentTool_ServerIsErrorRemainsSoftError(t *testing.T) {
|
||||
exec := &stubExecutor{
|
||||
result: &tools.MCPToolResult{
|
||||
IsError: true,
|
||||
Content: "search service is rate limited; try again in 30s",
|
||||
},
|
||||
}
|
||||
tool := newMCPAgentTool(exec, "pubmed__search")
|
||||
|
||||
resp, err := tool.Run(context.Background(), fantasy.ToolCall{Name: "pubmed__search"})
|
||||
if err != nil {
|
||||
t.Fatalf("expected nil error, got %v", err)
|
||||
}
|
||||
if !resp.IsError {
|
||||
t.Fatalf("expected IsError=true, got false")
|
||||
}
|
||||
if resp.Content != "search service is rate limited; try again in 30s" {
|
||||
t.Errorf("expected pass-through content, got %q", resp.Content)
|
||||
}
|
||||
}
|
||||
|
||||
// Happy path: ordinary successful tool result is passed through unchanged.
|
||||
func TestMCPAgentTool_SuccessIsPassthrough(t *testing.T) {
|
||||
exec := &stubExecutor{
|
||||
result: &tools.MCPToolResult{
|
||||
IsError: false,
|
||||
Content: `{"hits":3}`,
|
||||
},
|
||||
}
|
||||
tool := newMCPAgentTool(exec, "pubmed__search")
|
||||
|
||||
resp, err := tool.Run(context.Background(), fantasy.ToolCall{Name: "pubmed__search"})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if resp.IsError {
|
||||
t.Fatalf("expected IsError=false")
|
||||
}
|
||||
if resp.Content != `{"hits":3}` {
|
||||
t.Errorf("expected pass-through content, got %q", resp.Content)
|
||||
}
|
||||
}
|
||||
+91
-10
@@ -78,6 +78,13 @@ type App struct {
|
||||
// (~1 frame) so new updates are always let through once the TUI has had a
|
||||
// chance to process the pending event.
|
||||
widgetUpdatePending atomic.Bool
|
||||
|
||||
// steerDrainFn is the test seam used by releaseBusyAfterCompact to pull
|
||||
// any steer messages that arrived during compaction. In production it is
|
||||
// nil and the helper falls back to a.opts.Kit.DrainSteer(); tests that
|
||||
// need to exercise the steer-drain path without standing up a full
|
||||
// *kit.Kit can set this field directly to inject fake items.
|
||||
steerDrainFn func() []queueItem
|
||||
}
|
||||
|
||||
// New creates a new App with the provided options and pre-loaded messages.
|
||||
@@ -356,6 +363,10 @@ func (a *App) AddContextMessage(text string) {
|
||||
// tea.Program. customInstructions is optional text appended to the summary
|
||||
// prompt (e.g. "Focus on the API design decisions").
|
||||
//
|
||||
// Any prompts queued via Run/RunWithFiles or steering messages injected via
|
||||
// Steer/SteerWithFiles while compaction is running are flushed automatically
|
||||
// once compaction completes (see releaseBusyAfterCompact).
|
||||
//
|
||||
// Satisfies ui.AppController.
|
||||
func (a *App) CompactConversation(customInstructions string) error {
|
||||
a.mu.Lock()
|
||||
@@ -377,11 +388,7 @@ func (a *App) CompactConversation(customInstructions string) error {
|
||||
|
||||
go func() {
|
||||
defer a.wg.Done()
|
||||
defer func() {
|
||||
a.mu.Lock()
|
||||
a.busy = false
|
||||
a.mu.Unlock()
|
||||
}()
|
||||
defer a.releaseBusyAfterCompact()
|
||||
|
||||
// Subscribe to SDK events for streaming compaction summary to the TUI.
|
||||
sendFn := func(msg tea.Msg) {
|
||||
@@ -420,6 +427,9 @@ func (a *App) CompactConversation(customInstructions string) error {
|
||||
// CompactAsync is like CompactConversation but calls onComplete/onError
|
||||
// callbacks instead of sending TUI events. Used by the extension API's
|
||||
// ctx.Compact() which needs callback-based notification.
|
||||
//
|
||||
// Like CompactConversation, any prompts/steer messages received during
|
||||
// compaction are flushed automatically once compaction finishes.
|
||||
func (a *App) CompactAsync(customInstructions string, onComplete func(), onError func(string)) error {
|
||||
a.mu.Lock()
|
||||
if a.closed {
|
||||
@@ -440,11 +450,7 @@ func (a *App) CompactAsync(customInstructions string, onComplete func(), onError
|
||||
|
||||
go func() {
|
||||
defer a.wg.Done()
|
||||
defer func() {
|
||||
a.mu.Lock()
|
||||
a.busy = false
|
||||
a.mu.Unlock()
|
||||
}()
|
||||
defer a.releaseBusyAfterCompact()
|
||||
|
||||
// Subscribe to SDK events for streaming compaction summary to the TUI.
|
||||
sendFn := func(msg tea.Msg) {
|
||||
@@ -489,6 +495,81 @@ func (a *App) CompactAsync(customInstructions string, onComplete func(), onError
|
||||
return nil
|
||||
}
|
||||
|
||||
// releaseBusyAfterCompact is the deferred tail that runs at the end of every
|
||||
// compaction goroutine (success, error, or panic-after-recover paths). It
|
||||
// flips a.busy back to false, but before doing so it checks whether any
|
||||
// prompts piled up while compaction was running:
|
||||
//
|
||||
// - Run/RunWithFiles append to a.queue when a.busy is set.
|
||||
// - Steer/SteerWithFiles deposit messages into the SDK steer channel via
|
||||
// Kit.InjectSteerWithFiles when a.busy is set.
|
||||
//
|
||||
// Without this hand-off the queue would sit idle until the user submits
|
||||
// another prompt — see issue #27. If we find anything pending we keep busy
|
||||
// set, splice the steer messages to the front of the queue, and start a
|
||||
// fresh drainQueue goroutine to deliver them as a single batched turn.
|
||||
func (a *App) releaseBusyAfterCompact() {
|
||||
// Pull steer messages outside the app mutex; DrainSteer takes its own
|
||||
// internal lock and we don't want to nest the two. The test seam
|
||||
// (a.steerDrainFn) takes precedence so unit tests can inject fake
|
||||
// steer items without a real *kit.Kit.
|
||||
var steerItems []queueItem
|
||||
switch {
|
||||
case a.steerDrainFn != nil:
|
||||
steerItems = a.steerDrainFn()
|
||||
case a.opts.Kit != nil:
|
||||
if leftover := a.opts.Kit.DrainSteer(); len(leftover) > 0 {
|
||||
steerItems = make([]queueItem, len(leftover))
|
||||
for i, sm := range leftover {
|
||||
steerItems[i] = queueItem{Prompt: sm.Text, Files: sm.Files}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
a.mu.Lock()
|
||||
// If the app was closed while compaction was running, drop everything
|
||||
// and just clear busy. Run/Steer would have rejected new items already
|
||||
// after Close(), but this guards against in-flight items that slipped
|
||||
// in just before closed was set.
|
||||
if a.closed {
|
||||
a.queue = a.queue[:0]
|
||||
a.busy = false
|
||||
a.mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// Combine steer-channel items (front) with the in-memory queue (back).
|
||||
// Steer messages are placed first so they retain their "act now"
|
||||
// semantics relative to ordinary queued prompts that arrived later.
|
||||
pending := append(steerItems, a.queue...)
|
||||
a.queue = a.queue[:0]
|
||||
|
||||
if len(pending) == 0 {
|
||||
a.busy = false
|
||||
a.mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// Hand off to drainQueue: it will pick up the first item directly and
|
||||
// scoop the rest from a.queue on its first iteration.
|
||||
first := pending[0]
|
||||
if len(pending) > 1 {
|
||||
a.queue = append(a.queue, pending[1:]...)
|
||||
}
|
||||
// Stay busy across the goroutine swap.
|
||||
a.wg.Add(1)
|
||||
a.mu.Unlock()
|
||||
|
||||
// Notify the UI that steer-channel messages were consumed so the
|
||||
// steering badge can clear; ordinary queued prompts will be reflected
|
||||
// by the QueueUpdatedEvent that drainQueue emits as it picks them up.
|
||||
if len(steerItems) > 0 {
|
||||
a.sendEvent(SteerConsumedEvent{})
|
||||
}
|
||||
|
||||
go a.drainQueue(first)
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// Non-interactive execution
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
@@ -763,3 +763,209 @@ func TestFormatMaxTokensTruncatedMessage_NoKit(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// releaseBusyAfterCompact (issue #27)
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
// TestReleaseBusyAfterCompact_flushesQueuedMessages is a regression test for
|
||||
// issue #27: messages queued via Run() while /compact is running used to sit
|
||||
// in a.queue indefinitely until the user typed another prompt. After the fix
|
||||
// the deferred releaseBusyAfterCompact tail picks up any pending items and
|
||||
// dispatches drainQueue automatically.
|
||||
//
|
||||
// We simulate the compaction completion path directly (bypassing the SDK)
|
||||
// by toggling busy=true, populating the queue exactly as Run() would have
|
||||
// during compaction, and then invoking releaseBusyAfterCompact.
|
||||
func TestReleaseBusyAfterCompact_flushesQueuedMessages(t *testing.T) {
|
||||
stub := newStubWithFuncs(
|
||||
func(ctx context.Context) (*kit.TurnResult, error) {
|
||||
return turnResult("compacted then drained"), nil
|
||||
},
|
||||
)
|
||||
app := newTestApp(stub)
|
||||
defer app.Close()
|
||||
|
||||
// Simulate the state at the start of the compaction tail: busy is set
|
||||
// and a couple of prompts have piled up in the queue while we were
|
||||
// summarising. (Run() would have appended them and returned a queue
|
||||
// length > 0 to the caller.)
|
||||
app.mu.Lock()
|
||||
app.busy = true
|
||||
app.queue = append(app.queue,
|
||||
queueItem{Prompt: "queued during compact #1"},
|
||||
queueItem{Prompt: "queued during compact #2"},
|
||||
)
|
||||
app.mu.Unlock()
|
||||
|
||||
// Invoke the deferred tail directly. It should kick off drainQueue.
|
||||
app.releaseBusyAfterCompact()
|
||||
|
||||
// drainQueue runs in a goroutine. Wait for the app to come back to idle.
|
||||
ok := waitForCondition(2*time.Second, func() bool {
|
||||
app.mu.Lock()
|
||||
defer app.mu.Unlock()
|
||||
return !app.busy
|
||||
})
|
||||
if !ok {
|
||||
t.Fatal("app did not become idle after releaseBusyAfterCompact: queue not drained")
|
||||
}
|
||||
|
||||
// Wait for any in-flight goroutine to finish before reading state.
|
||||
app.wg.Wait()
|
||||
|
||||
if got := app.QueueLength(); got != 0 {
|
||||
t.Fatalf("expected empty queue after drain, got %d", got)
|
||||
}
|
||||
if n := stub.callCount(); n == 0 {
|
||||
t.Fatalf("expected stub PromptFunc to fire at least once after compact, got %d calls", n)
|
||||
}
|
||||
}
|
||||
|
||||
// TestReleaseBusyAfterCompact_idleWhenQueueEmpty verifies that with no
|
||||
// pending messages the helper just clears busy and does NOT spawn a
|
||||
// drainQueue goroutine (no spurious agent turn).
|
||||
func TestReleaseBusyAfterCompact_idleWhenQueueEmpty(t *testing.T) {
|
||||
stub := newStub()
|
||||
app := newTestApp(stub)
|
||||
defer app.Close()
|
||||
|
||||
app.mu.Lock()
|
||||
app.busy = true
|
||||
app.mu.Unlock()
|
||||
|
||||
app.releaseBusyAfterCompact()
|
||||
|
||||
app.mu.Lock()
|
||||
busy := app.busy
|
||||
app.mu.Unlock()
|
||||
if busy {
|
||||
t.Fatal("expected busy=false after releaseBusyAfterCompact with empty queue")
|
||||
}
|
||||
|
||||
// Give any rogue goroutine a moment to (incorrectly) call PromptFunc.
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
if n := stub.callCount(); n != 0 {
|
||||
t.Fatalf("expected 0 PromptFunc calls when queue empty, got %d", n)
|
||||
}
|
||||
}
|
||||
|
||||
// TestReleaseBusyAfterCompact_splicesSteerAheadOfQueue exercises the SDK
|
||||
// steer-drain branch of releaseBusyAfterCompact (issue #27 follow-up).
|
||||
//
|
||||
// Production wires a.opts.Kit.DrainSteer() to pull messages that arrived via
|
||||
// Steer/SteerWithFiles during compaction, but Options.Kit is *kit.Kit (a
|
||||
// concrete struct) so unit tests cannot stand up a real instance without a
|
||||
// full LLM backend. The test uses the unexported steerDrainFn seam to inject
|
||||
// fake steer items, then asserts that:
|
||||
//
|
||||
// - Steer items are dispatched ahead of any prompts that piled up in
|
||||
// a.queue (steer retains "act now" priority over ordinary queued
|
||||
// prompts), and
|
||||
// - the helper still hands off to drainQueue so the steer item actually
|
||||
// fires (the previous behaviour left them stranded — see #27).
|
||||
func TestReleaseBusyAfterCompact_splicesSteerAheadOfQueue(t *testing.T) {
|
||||
var pmu sync.Mutex
|
||||
var firstPrompt string
|
||||
stub := newStubWithFuncs(
|
||||
func(ctx context.Context) (*kit.TurnResult, error) {
|
||||
return turnResult("steer dispatched"), nil
|
||||
},
|
||||
)
|
||||
// Wrap PromptFunc so we can capture the prompt text the stub receives
|
||||
// (newStubWithFuncs's fns ignore prompt; we need it to verify ordering).
|
||||
capturingPrompt := func(ctx context.Context, prompt string) (*kit.TurnResult, error) {
|
||||
pmu.Lock()
|
||||
if firstPrompt == "" {
|
||||
firstPrompt = prompt
|
||||
}
|
||||
pmu.Unlock()
|
||||
return stub.fn(ctx, prompt)
|
||||
}
|
||||
app := New(Options{PromptFunc: capturingPrompt}, nil)
|
||||
defer app.Close()
|
||||
|
||||
// Inject fake steer items via the test seam. In production the same
|
||||
// items would have been delivered through Kit.InjectSteerWithFiles
|
||||
// during /compact and pulled by DrainSteer here.
|
||||
app.steerDrainFn = func() []queueItem {
|
||||
return []queueItem{
|
||||
{Prompt: "steer-1"},
|
||||
{Prompt: "steer-2"},
|
||||
}
|
||||
}
|
||||
|
||||
// Simulate the state at the end of compaction: busy is set and a couple
|
||||
// of regular Run() prompts have piled up after the steer messages.
|
||||
app.mu.Lock()
|
||||
app.busy = true
|
||||
app.queue = append(app.queue,
|
||||
queueItem{Prompt: "queued-1"},
|
||||
queueItem{Prompt: "queued-2"},
|
||||
)
|
||||
app.mu.Unlock()
|
||||
|
||||
app.releaseBusyAfterCompact()
|
||||
|
||||
// Wait for the dispatched batch to complete.
|
||||
ok := waitForCondition(2*time.Second, func() bool {
|
||||
app.mu.Lock()
|
||||
defer app.mu.Unlock()
|
||||
return !app.busy
|
||||
})
|
||||
if !ok {
|
||||
t.Fatal("app did not become idle after steer-spliced releaseBusyAfterCompact")
|
||||
}
|
||||
app.wg.Wait()
|
||||
|
||||
// drainQueue picks up `first` directly and batches the rest. With
|
||||
// PromptFunc set, executeBatch invokes us with items[0] only — that
|
||||
// item must be the first steer message, proving steer items were
|
||||
// spliced ahead of the previously queued prompts.
|
||||
pmu.Lock()
|
||||
got := firstPrompt
|
||||
pmu.Unlock()
|
||||
if got != "steer-1" {
|
||||
t.Fatalf("expected first dispatched prompt to be steer item %q (steer items must come before queued prompts), got %q",
|
||||
"steer-1", got)
|
||||
}
|
||||
|
||||
// Queue should be fully drained and PromptFunc must have actually fired.
|
||||
if n := app.QueueLength(); n != 0 {
|
||||
t.Fatalf("expected empty queue after drain, got %d entries", n)
|
||||
}
|
||||
if n := stub.callCount(); n == 0 {
|
||||
t.Fatal("expected stub PromptFunc to fire at least once after splice")
|
||||
}
|
||||
}
|
||||
|
||||
// TestReleaseBusyAfterCompact_dropsQueueWhenClosed verifies that if the app
|
||||
// was closed during compaction the helper discards any pending items rather
|
||||
// than spawning drainQueue against a torn-down App.
|
||||
func TestReleaseBusyAfterCompact_dropsQueueWhenClosed(t *testing.T) {
|
||||
stub := newStub()
|
||||
app := newTestApp(stub)
|
||||
|
||||
app.mu.Lock()
|
||||
app.busy = true
|
||||
app.queue = append(app.queue, queueItem{Prompt: "would have run"})
|
||||
app.closed = true
|
||||
app.mu.Unlock()
|
||||
|
||||
app.releaseBusyAfterCompact()
|
||||
|
||||
app.mu.Lock()
|
||||
busy := app.busy
|
||||
qLen := len(app.queue)
|
||||
app.mu.Unlock()
|
||||
if busy {
|
||||
t.Fatal("expected busy=false even when closed")
|
||||
}
|
||||
if qLen != 0 {
|
||||
t.Fatalf("expected queue cleared on closed app, got %d entries", qLen)
|
||||
}
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
if n := stub.callCount(); n != 0 {
|
||||
t.Fatalf("expected 0 PromptFunc calls on closed app, got %d", n)
|
||||
}
|
||||
}
|
||||
|
||||
+54
-6
@@ -129,9 +129,10 @@ type AppController interface {
|
||||
// SkillItem holds display metadata about a loaded skill for the startup
|
||||
// [Skills] section. Built by the CLI layer from the SDK's []*kit.Skill.
|
||||
type SkillItem struct {
|
||||
Name string // Skill name (e.g. "btca-cli").
|
||||
Path string // Absolute path to the skill file.
|
||||
Source string // "project" or "user" (global).
|
||||
Name string // Skill name (e.g. "btca-cli").
|
||||
Path string // Absolute path to the skill file.
|
||||
Source string // "project" or "user" (global).
|
||||
Description string // Short summary used in autocomplete and help.
|
||||
}
|
||||
|
||||
// MCPPromptInfo describes an MCP prompt for display in the TUI (autocomplete,
|
||||
@@ -912,6 +913,20 @@ func NewAppModel(appCtrl AppController, opts AppModelOptions) *AppModel {
|
||||
}
|
||||
}
|
||||
|
||||
// Merge skills into autocomplete as /skill:<name> commands. Skills accept
|
||||
// optional trailing args, so HasArgs is true — Enter populates the input
|
||||
// with "/skill:name " rather than auto-submitting.
|
||||
if ic, ok := m.input.(*InputComponent); ok && len(opts.SkillItems) > 0 {
|
||||
for _, s := range opts.SkillItems {
|
||||
ic.commands = append(ic.commands, commands.SlashCommand{
|
||||
Name: "/skill:" + s.Name,
|
||||
Description: formatSkillDescription(s),
|
||||
Category: "Skills",
|
||||
HasArgs: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Merge MCP prompts into autocomplete as /<server>:<prompt> commands.
|
||||
if ic, ok := m.input.(*InputComponent); ok && len(opts.MCPPrompts) > 0 {
|
||||
for _, p := range opts.MCPPrompts {
|
||||
@@ -3395,13 +3410,46 @@ func (m *AppModel) refreshPromptTemplates() {
|
||||
}
|
||||
}
|
||||
|
||||
// refreshSkillItems reloads skill items from the provider callback.
|
||||
// Called on ContentReloadEvent.
|
||||
// refreshSkillItems reloads skill items from the provider callback and
|
||||
// updates the autocomplete entries. Called on ContentReloadEvent.
|
||||
func (m *AppModel) refreshSkillItems() {
|
||||
if m.getSkillItems == nil {
|
||||
return
|
||||
}
|
||||
m.skillItems = m.getSkillItems()
|
||||
newItems := m.getSkillItems()
|
||||
m.skillItems = newItems
|
||||
|
||||
if ic, ok := m.input.(*InputComponent); ok {
|
||||
// Remove old Skills commands and add fresh ones.
|
||||
var kept []commands.SlashCommand
|
||||
for _, sc := range ic.commands {
|
||||
if sc.Category != "Skills" {
|
||||
kept = append(kept, sc)
|
||||
}
|
||||
}
|
||||
for _, s := range newItems {
|
||||
kept = append(kept, commands.SlashCommand{
|
||||
Name: "/skill:" + s.Name,
|
||||
Description: formatSkillDescription(s),
|
||||
Category: "Skills",
|
||||
HasArgs: true,
|
||||
})
|
||||
}
|
||||
ic.commands = kept
|
||||
}
|
||||
}
|
||||
|
||||
// formatSkillDescription returns the autocomplete description for a skill,
|
||||
// prefixed with [project] or [user] so users can tell colliding names apart.
|
||||
func formatSkillDescription(s SkillItem) string {
|
||||
prefix := "[user]"
|
||||
if s.Source == "project" {
|
||||
prefix = "[project]"
|
||||
}
|
||||
if s.Description == "" {
|
||||
return prefix
|
||||
}
|
||||
return prefix + " " + s.Description
|
||||
}
|
||||
|
||||
// refreshMCPPrompts reloads MCP prompts from the provider callback and
|
||||
|
||||
+35
-1
@@ -58,6 +58,9 @@ type Kit struct {
|
||||
// When false, per-model system prompts from modelSettings/customModels
|
||||
// can replace the default prompt on model switch.
|
||||
hasCustomSystemPrompt bool
|
||||
// systemPromptSource holds the raw configured value (file path or text)
|
||||
// when hasCustomSystemPrompt is true; empty when the built-in default is in use.
|
||||
systemPromptSource string
|
||||
|
||||
// Hook registries — interception layer (see hooks.go).
|
||||
beforeToolCall *hookRegistry[BeforeToolCallHook, BeforeToolCallResult]
|
||||
@@ -632,6 +635,21 @@ func (m *Kit) SetModel(ctx context.Context, modelString string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// HasCustomSystemPrompt reports whether the user explicitly configured a system
|
||||
// prompt via --system-prompt, a config file entry, or SDK Options.SystemPrompt.
|
||||
// When false, the built-in default (or a per-model override) is in use and can
|
||||
// be replaced transparently on model switch.
|
||||
func (m *Kit) HasCustomSystemPrompt() bool {
|
||||
return m.hasCustomSystemPrompt
|
||||
}
|
||||
|
||||
// GetSystemPromptSource returns the raw configured value — a file path or
|
||||
// inline text — when HasCustomSystemPrompt is true; returns an empty string
|
||||
// when the built-in default prompt is active.
|
||||
func (m *Kit) GetSystemPromptSource() string {
|
||||
return m.systemPromptSource
|
||||
}
|
||||
|
||||
// composeSystemPrompt takes a base system prompt and composes it with the
|
||||
// current runtime context: AGENTS.md content, skills metadata, and date/cwd.
|
||||
// This mirrors the composition done during Kit.New() initialization.
|
||||
@@ -1179,6 +1197,7 @@ func New(ctx context.Context, opts *Options) (*Kit, error) {
|
||||
maxSteps int
|
||||
streaming bool
|
||||
hasCustomSystemPrompt bool
|
||||
systemPromptSource string
|
||||
)
|
||||
|
||||
if err := func() error {
|
||||
@@ -1285,13 +1304,27 @@ func New(ctx context.Context, opts *Options) (*Kit, error) {
|
||||
// explicitly set system-prompt, use the per-model prompt as the
|
||||
// base instead of the global default.
|
||||
{
|
||||
basePrompt := viper.GetString("system-prompt")
|
||||
rawPromptInput := viper.GetString("system-prompt")
|
||||
|
||||
// Resolve a file path to its content so PromptBuilder receives the
|
||||
// actual prompt text rather than a literal path string. Without this,
|
||||
// when system-prompt is set to a file path in the config file or via
|
||||
// --system-prompt, the path itself becomes the effective system prompt
|
||||
// sent to the model (LoadSystemPrompt only ran later, after viper had
|
||||
// been overwritten with the augmented base text).
|
||||
basePrompt, _ := config.LoadSystemPrompt(rawPromptInput)
|
||||
if basePrompt == "" {
|
||||
basePrompt = rawPromptInput
|
||||
}
|
||||
|
||||
// Track whether the user explicitly configured a custom system
|
||||
// prompt. When they haven't (basePrompt is the built-in default
|
||||
// or empty), per-model system prompts can replace it on switch.
|
||||
userSetSystemPrompt := basePrompt != "" && basePrompt != defaultSystemPrompt
|
||||
hasCustomSystemPrompt = userSetSystemPrompt
|
||||
if hasCustomSystemPrompt {
|
||||
systemPromptSource = rawPromptInput
|
||||
}
|
||||
|
||||
// Check for per-model system prompt override when no explicit
|
||||
// global system-prompt was configured by the user.
|
||||
@@ -1500,6 +1533,7 @@ func New(ctx context.Context, opts *Options) (*Kit, error) {
|
||||
opts: opts,
|
||||
mcpConfig: mcpConfig,
|
||||
hasCustomSystemPrompt: hasCustomSystemPrompt,
|
||||
systemPromptSource: systemPromptSource,
|
||||
beforeToolCall: beforeToolCall,
|
||||
afterToolResult: afterToolResult,
|
||||
beforeTurn: beforeTurn,
|
||||
|
||||
@@ -3,6 +3,7 @@ package kit_test
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/spf13/viper"
|
||||
@@ -306,3 +307,92 @@ func TestSessionManagement(t *testing.T) {
|
||||
// resetViper wipes viper's global state so a test case doesn't leak
|
||||
// viper.Set() calls into the next one. Used via defer in subtests.
|
||||
func resetViper() { viper.Reset() }
|
||||
|
||||
// TestNewSystemPromptFilePath is a regression test for issue #25.
|
||||
//
|
||||
// When Options.SystemPrompt (or the --system-prompt flag / config entry) is a
|
||||
// file path, Kit must resolve the path to its file contents *before* the
|
||||
// PromptBuilder composes the runtime context. Previously the path string
|
||||
// itself was used verbatim as the base prompt, so the LLM received the path —
|
||||
// not the prompt — as its system message.
|
||||
func TestNewSystemPromptFilePath(t *testing.T) {
|
||||
if os.Getenv("ANTHROPIC_API_KEY") == "" {
|
||||
t.Skip("Skipping test: ANTHROPIC_API_KEY not set")
|
||||
}
|
||||
defer resetViper()
|
||||
|
||||
const promptContent = "You are a strict regression-test persona. Marker: KIT-25-OK"
|
||||
|
||||
tmpFile, err := os.CreateTemp(t.TempDir(), "kit-system-prompt-*.md")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create temp prompt file: %v", err)
|
||||
}
|
||||
if _, err := tmpFile.WriteString(promptContent); err != nil {
|
||||
t.Fatalf("failed to write temp prompt file: %v", err)
|
||||
}
|
||||
if err := tmpFile.Close(); err != nil {
|
||||
t.Fatalf("failed to close temp prompt file: %v", err)
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
host, err := kit.New(ctx, &kit.Options{
|
||||
Model: "anthropic/claude-sonnet-4-5-20250929",
|
||||
SystemPrompt: tmpFile.Name(),
|
||||
Quiet: true,
|
||||
NoSession: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create Kit with system-prompt file: %v", err)
|
||||
}
|
||||
defer func() { _ = host.Close() }()
|
||||
|
||||
if !host.HasCustomSystemPrompt() {
|
||||
t.Error("HasCustomSystemPrompt() = false; want true when --system-prompt is set")
|
||||
}
|
||||
if got, want := host.GetSystemPromptSource(), tmpFile.Name(); got != want {
|
||||
t.Errorf("GetSystemPromptSource() = %q; want %q", got, want)
|
||||
}
|
||||
|
||||
// The composed system prompt is written back to viper after PromptBuilder
|
||||
// runs. It must contain the file's contents, not the file path.
|
||||
composed := viper.GetString("system-prompt")
|
||||
if !strings.Contains(composed, promptContent) {
|
||||
t.Errorf("composed system-prompt does not contain file contents\n composed = %q\n want substring = %q", composed, promptContent)
|
||||
}
|
||||
if strings.TrimSpace(composed) == tmpFile.Name() {
|
||||
t.Errorf("composed system-prompt is the file path verbatim (%q); LoadSystemPrompt was not applied before PromptBuilder", composed)
|
||||
}
|
||||
}
|
||||
|
||||
// TestNewSystemPromptInline confirms that inline system-prompt strings still
|
||||
// flow through unchanged after the file-path resolution change.
|
||||
func TestNewSystemPromptInline(t *testing.T) {
|
||||
if os.Getenv("ANTHROPIC_API_KEY") == "" {
|
||||
t.Skip("Skipping test: ANTHROPIC_API_KEY not set")
|
||||
}
|
||||
defer resetViper()
|
||||
|
||||
const inline = "You are a concise inline-prompt persona."
|
||||
|
||||
ctx := context.Background()
|
||||
host, err := kit.New(ctx, &kit.Options{
|
||||
Model: "anthropic/claude-sonnet-4-5-20250929",
|
||||
SystemPrompt: inline,
|
||||
Quiet: true,
|
||||
NoSession: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create Kit with inline system-prompt: %v", err)
|
||||
}
|
||||
defer func() { _ = host.Close() }()
|
||||
|
||||
if !host.HasCustomSystemPrompt() {
|
||||
t.Error("HasCustomSystemPrompt() = false; want true for inline prompt")
|
||||
}
|
||||
if got := host.GetSystemPromptSource(); got != inline {
|
||||
t.Errorf("GetSystemPromptSource() = %q; want %q", got, inline)
|
||||
}
|
||||
if composed := viper.GetString("system-prompt"); !strings.Contains(composed, inline) {
|
||||
t.Errorf("composed system-prompt missing inline content; got %q", composed)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
# Specs
|
||||
|
||||
| Spec | Status | Description |
|
||||
|------|--------|-------------|
|
||||
| [unified-bubbletea-architecture](unified-bubbletea-architecture.md) | Draft | Replace micro-program pattern with single Bubble Tea program + thick app layer |
|
||||
Reference in New Issue
Block a user