mirror of
https://github.com/mark3labs/kit.git
synced 2026-06-20 22:26:17 +00:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 3bb20f5283 | |||
| 633fa38b2b | |||
| f905cee48c | |||
| 182c10ea1a | |||
| fcaa52bf1c | |||
| 7e6455732c |
@@ -13,6 +13,8 @@
|
||||
// - No channels in maps (Yaegi panics on range over map[string]chan)
|
||||
// - All ctx.* calls guarded with nil checks
|
||||
// - Simple data structures only
|
||||
// - The extension runner serializes handler calls per-extension, so
|
||||
// concurrent subagent events cannot race on this shared state.
|
||||
package main
|
||||
|
||||
import (
|
||||
@@ -43,7 +45,8 @@ const (
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Package-level state - all simple types
|
||||
// Package-level state — safe because the runner serializes all handler
|
||||
// invocations for the same extension (per-extension reentrant mutex).
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
var (
|
||||
@@ -282,8 +285,8 @@ func Init(api ext.API) {
|
||||
|
||||
submonPushWidget()
|
||||
|
||||
// Remove the entry immediately (no goroutine to avoid races)
|
||||
newEntries := submonEntries[:0]
|
||||
// Remove the entry — build a new slice to avoid aliasing bugs
|
||||
newEntries := make([]*submonEntry, 0, len(submonEntries))
|
||||
for _, en := range submonEntries {
|
||||
if en.callID != e.ToolCallID {
|
||||
newEntries = append(newEntries, en)
|
||||
|
||||
@@ -18,7 +18,7 @@ A powerful, extensible AI coding agent CLI with multi-provider support, built-in
|
||||
## Features
|
||||
|
||||
- **Multi-Provider LLM Support**: Anthropic, OpenAI, Google Gemini, Ollama, Azure OpenAI, AWS Bedrock, OpenRouter, and more
|
||||
- **Built-in Core Tools**: bash, read, write, edit, grep, find, ls, subagent - no MCP overhead
|
||||
- **Built-in Core Tools**: bash (with interactive sudo password prompt), read, write, edit, grep, find, ls, subagent - no MCP overhead
|
||||
- **Smart @ Attachments**: Binary files auto-detected via MIME type, MCP resources via `@mcp:server:uri`
|
||||
- **MCP Integration**: Connect external MCP servers for expanded capabilities
|
||||
- **Extension System**: Write custom tools, commands, widgets, and UI modifications in Go
|
||||
|
||||
+1
-1
@@ -297,7 +297,7 @@ func init() {
|
||||
flags.BoolVar(&noPromptTemplates, "no-prompt-templates", false, "disable prompt template discovery")
|
||||
|
||||
// Model generation parameters
|
||||
flags.IntVar(&maxTokens, "max-tokens", 4096, "maximum number of tokens in the response")
|
||||
flags.IntVar(&maxTokens, "max-tokens", 8192, "maximum number of output tokens per response (auto-raised up to 32768 for models with higher known output limits; see internal/models/embedded_models.json)")
|
||||
flags.Float32Var(&temperature, "temperature", 0.7, "controls randomness in responses (0.0-1.0)")
|
||||
flags.Float32Var(&topP, "top-p", 0.95, "controls diversity via nucleus sampling (0.0-1.0)")
|
||||
flags.Int32Var(&topK, "top-k", 40, "controls diversity by limiting top K tokens to sample from")
|
||||
|
||||
@@ -130,6 +130,58 @@ func TestSubagentMonitor_MultipleSubagents(t *testing.T) {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
|
||||
// TestSubagentMonitor_ConcurrentSubagents verifies no panics when multiple
|
||||
// subagents emit events concurrently from different goroutines.
|
||||
func TestSubagentMonitor_ConcurrentSubagents(t *testing.T) {
|
||||
harness := test.New(t)
|
||||
harness.LoadFile("../../.kit/extensions/subagent-monitor.go")
|
||||
|
||||
_, err := harness.Emit(extensions.SessionStartEvent{SessionID: "test-session"})
|
||||
if err != nil {
|
||||
t.Fatalf("SessionStart should not error: %v", err)
|
||||
}
|
||||
|
||||
// Start 5 subagents concurrently
|
||||
done := make(chan struct{}, 5)
|
||||
for i := range 5 {
|
||||
go func(idx int) {
|
||||
defer func() { done <- struct{}{} }()
|
||||
|
||||
callID := fmt.Sprintf("concurrent-%d", idx)
|
||||
task := fmt.Sprintf("concurrent task %d", idx)
|
||||
|
||||
_, _ = harness.Emit(extensions.SubagentStartEvent{
|
||||
ToolCallID: callID,
|
||||
Task: task,
|
||||
})
|
||||
|
||||
// Emit many chunks rapidly
|
||||
for j := range 20 {
|
||||
_, _ = harness.Emit(extensions.SubagentChunkEvent{
|
||||
ToolCallID: callID,
|
||||
Task: task,
|
||||
ChunkType: "text",
|
||||
Content: fmt.Sprintf("agent %d chunk %d", idx, j),
|
||||
})
|
||||
}
|
||||
|
||||
_, _ = harness.Emit(extensions.SubagentEndEvent{
|
||||
ToolCallID: callID,
|
||||
Task: task,
|
||||
Response: "done",
|
||||
})
|
||||
}(i)
|
||||
}
|
||||
|
||||
// Wait for all goroutines
|
||||
for range 5 {
|
||||
<-done
|
||||
}
|
||||
|
||||
// Allow any final processing
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
}
|
||||
|
||||
// TestSubagentMonitor_SessionShutdown verifies shutdown doesn't panic
|
||||
// even with nil ctx functions.
|
||||
func TestSubagentMonitor_SessionShutdown(t *testing.T) {
|
||||
|
||||
@@ -1025,6 +1025,22 @@ func (a *Agent) GetModel() fantasy.LanguageModel {
|
||||
return a.model
|
||||
}
|
||||
|
||||
// GetMaxTokens returns the effective max output tokens the agent currently
|
||||
// sends to the LLM provider, after per-model defaults, right-sizing, and any
|
||||
// Anthropic thinking-budget adjustments. Returns 0 when no ModelConfig is
|
||||
// attached (e.g. early init) or when the provider suppresses the parameter
|
||||
// (e.g. Codex OAuth), which allows callers to differentiate "default" from
|
||||
// "explicitly capped".
|
||||
func (a *Agent) GetMaxTokens() int {
|
||||
if a.skipMaxOutputTokens {
|
||||
return 0
|
||||
}
|
||||
if a.modelConfig == nil {
|
||||
return 0
|
||||
}
|
||||
return a.modelConfig.MaxTokens
|
||||
}
|
||||
|
||||
// Close closes the agent and cleans up resources.
|
||||
// If MCP tools are still loading in the background, Close waits for them
|
||||
// to finish before closing connections to avoid resource leaks.
|
||||
|
||||
@@ -932,6 +932,8 @@ func (a *App) subscribeSDKEvents(sendFn func(tea.Msg), stepUsageSeen *atomic.Boo
|
||||
Password: resp.Password,
|
||||
Cancelled: resp.Cancelled,
|
||||
}
|
||||
case kit.TurnEndEvent:
|
||||
a.handleTurnEnd(ev, sendFn)
|
||||
}
|
||||
}))
|
||||
|
||||
@@ -942,6 +944,64 @@ func (a *App) subscribeSDKEvents(sendFn func(tea.Msg), stepUsageSeen *atomic.Boo
|
||||
}
|
||||
}
|
||||
|
||||
// handleTurnEnd inspects a turn's final StopReason and surfaces actionable
|
||||
// feedback to the user when the turn ended in a state they can act on.
|
||||
//
|
||||
// Today the only surfaced case is FinishReasonLength — the model hit its
|
||||
// configured max_output_tokens budget and the reply was truncated. Without
|
||||
// this banner the TUI used to swallow the truncation silently, leading to
|
||||
// "ghost" cut-offs with no indication of why.
|
||||
//
|
||||
// Separated from subscribeSDKEvents so tests can exercise it directly via a
|
||||
// stubbed sendFn without standing up a full Kit.
|
||||
func (a *App) handleTurnEnd(ev kit.TurnEndEvent, sendFn func(tea.Msg)) {
|
||||
if sendFn == nil {
|
||||
return
|
||||
}
|
||||
if ev.StopReason != kit.FinishReasonLength {
|
||||
return
|
||||
}
|
||||
sendFn(ExtensionPrintEvent{
|
||||
Level: "info",
|
||||
Text: a.formatMaxTokensTruncatedMessage(),
|
||||
})
|
||||
}
|
||||
|
||||
// formatMaxTokensTruncatedMessage builds the user-facing explanation for a
|
||||
// truncated turn. It reports the active max_output_tokens budget and, when
|
||||
// known, the model's catalog output ceiling so the user can judge how much
|
||||
// headroom is available.
|
||||
func (a *App) formatMaxTokensTruncatedMessage() string {
|
||||
k := a.opts.Kit
|
||||
if k == nil {
|
||||
// Extremely early / test-stub case: still emit a useful generic hint.
|
||||
return "⚠ Response truncated: the model hit the configured max_output_tokens limit. " +
|
||||
"Raise it with --max-tokens N, KIT_MAX_TOKENS=N, or per-model " +
|
||||
"modelSettings[provider/model].maxTokens in config."
|
||||
}
|
||||
current := k.MaxTokens()
|
||||
ceiling := k.MaxOutputLimit()
|
||||
model := k.GetModelString()
|
||||
|
||||
msg := "⚠ Response truncated: "
|
||||
if model != "" {
|
||||
msg += fmt.Sprintf("%s hit the configured max_output_tokens limit", model)
|
||||
} else {
|
||||
msg += "the model hit the configured max_output_tokens limit"
|
||||
}
|
||||
if current > 0 {
|
||||
msg += fmt.Sprintf(" (%d)", current)
|
||||
}
|
||||
msg += "."
|
||||
if ceiling > 0 && current > 0 && ceiling > current {
|
||||
msg += fmt.Sprintf(" This model supports up to %d output tokens.", ceiling)
|
||||
}
|
||||
msg += "\n\nRaise it with --max-tokens N, KIT_MAX_TOKENS=N, " +
|
||||
"or per-model modelSettings[provider/model].maxTokens in your config. " +
|
||||
"Re-run the last prompt after raising it to get the full response."
|
||||
return msg
|
||||
}
|
||||
|
||||
// QuitFromExtension triggers a graceful shutdown. In interactive mode it
|
||||
// sends a tea.QuitMsg to the program so the TUI exits cleanly. In
|
||||
// non-interactive mode it cancels the root context, stopping any in-flight
|
||||
|
||||
@@ -3,10 +3,12 @@ package app
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
tea "charm.land/bubbletea/v2"
|
||||
kit "github.com/mark3labs/kit/pkg/kit"
|
||||
)
|
||||
|
||||
@@ -666,3 +668,94 @@ func TestUpdateUsageFromTurnResult_contextTokensUsesAllCategories(t *testing.T)
|
||||
expected, usage.contextCalls, usage.lastContextTokens)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHandleTurnEnd_LengthEmitsWarning verifies that when the SDK reports a
|
||||
// FinishReasonLength (max_output_tokens hit), the app surfaces a user-visible
|
||||
// ExtensionPrintEvent with Level="info" so the TUI can render a banner
|
||||
// instead of silently showing a truncated reply.
|
||||
func TestHandleTurnEnd_LengthEmitsWarning(t *testing.T) {
|
||||
app := New(Options{}, nil)
|
||||
defer app.Close()
|
||||
|
||||
var mu sync.Mutex
|
||||
var received []tea.Msg
|
||||
sendFn := func(m tea.Msg) {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
received = append(received, m)
|
||||
}
|
||||
|
||||
app.handleTurnEnd(kit.TurnEndEvent{StopReason: kit.FinishReasonLength}, sendFn)
|
||||
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if len(received) != 1 {
|
||||
t.Fatalf("expected 1 event on length stop, got %d", len(received))
|
||||
}
|
||||
ev, ok := received[0].(ExtensionPrintEvent)
|
||||
if !ok {
|
||||
t.Fatalf("expected ExtensionPrintEvent, got %T", received[0])
|
||||
}
|
||||
if ev.Level != "info" {
|
||||
t.Errorf("expected Level=info, got %q", ev.Level)
|
||||
}
|
||||
if ev.Text == "" {
|
||||
t.Error("expected non-empty warning text")
|
||||
}
|
||||
if !strings.Contains(ev.Text, "max_output_tokens") {
|
||||
t.Errorf("warning text should mention max_output_tokens, got: %s", ev.Text)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHandleTurnEnd_NonLengthIgnored verifies that ordinary stop reasons
|
||||
// (stop, tool-calls, error, unknown, "") do not produce a warning banner.
|
||||
func TestHandleTurnEnd_NonLengthIgnored(t *testing.T) {
|
||||
app := New(Options{}, nil)
|
||||
defer app.Close()
|
||||
|
||||
reasons := []string{
|
||||
kit.FinishReasonStop,
|
||||
kit.FinishReasonToolCalls,
|
||||
kit.FinishReasonError,
|
||||
kit.FinishReasonContentFilter,
|
||||
kit.FinishReasonOther,
|
||||
kit.FinishReasonUnknown,
|
||||
"",
|
||||
}
|
||||
for _, r := range reasons {
|
||||
var called bool
|
||||
app.handleTurnEnd(kit.TurnEndEvent{StopReason: r}, func(m tea.Msg) {
|
||||
called = true
|
||||
})
|
||||
if called {
|
||||
t.Errorf("stop reason %q unexpectedly emitted a warning", r)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestHandleTurnEnd_NilSendFn guards against panics when no TUI listener is
|
||||
// attached (e.g. early init or headless teardown).
|
||||
func TestHandleTurnEnd_NilSendFn(t *testing.T) {
|
||||
app := New(Options{}, nil)
|
||||
defer app.Close()
|
||||
|
||||
// Should not panic with a nil sendFn.
|
||||
app.handleTurnEnd(kit.TurnEndEvent{StopReason: kit.FinishReasonLength}, nil)
|
||||
}
|
||||
|
||||
// TestFormatMaxTokensTruncatedMessage_NoKit verifies the fallback message
|
||||
// when Options.Kit is nil (test/stub path).
|
||||
func TestFormatMaxTokensTruncatedMessage_NoKit(t *testing.T) {
|
||||
app := New(Options{}, nil)
|
||||
defer app.Close()
|
||||
|
||||
msg := app.formatMaxTokensTruncatedMessage()
|
||||
if msg == "" {
|
||||
t.Fatal("expected non-empty fallback message")
|
||||
}
|
||||
for _, needle := range []string{"max_output_tokens", "--max-tokens", "KIT_MAX_TOKENS", "modelSettings"} {
|
||||
if !strings.Contains(msg, needle) {
|
||||
t.Errorf("fallback message missing %q:\n%s", needle, msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,21 +1,93 @@
|
||||
package extensions
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/spf13/viper"
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// reentrantMu — a per-extension mutex that allows the same goroutine to
|
||||
// re-enter (e.g. handler → ctx.EmitCustomEvent → handler in same extension).
|
||||
// Different goroutines are serialized, preventing concurrent state mutation.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type reentrantMu struct {
|
||||
mu sync.Mutex
|
||||
cond *sync.Cond
|
||||
owner int64 // goroutine ID that holds the lock, or 0
|
||||
depth int // re-entrancy depth
|
||||
}
|
||||
|
||||
// initReentrantMu initializes the reentrant mutex in-place. Must be called
|
||||
// after the struct is at its final memory location (not before copying).
|
||||
func (r *reentrantMu) init() {
|
||||
r.cond = sync.NewCond(&r.mu)
|
||||
}
|
||||
|
||||
// lock acquires the mutex. If the calling goroutine already holds it, the
|
||||
// call succeeds immediately (re-entrant). Every call to lock must be paired
|
||||
// with a call to unlock.
|
||||
func (r *reentrantMu) lock() {
|
||||
gid := goroutineID()
|
||||
r.mu.Lock()
|
||||
if r.owner == gid {
|
||||
// Re-entrant: same goroutine already holds the lock.
|
||||
r.depth++
|
||||
r.mu.Unlock()
|
||||
return
|
||||
}
|
||||
// Wait for the current owner to release.
|
||||
for r.owner != 0 {
|
||||
r.cond.Wait() // releases mu, blocks, re-acquires mu on wake
|
||||
}
|
||||
r.owner = gid
|
||||
r.depth = 1
|
||||
r.mu.Unlock()
|
||||
}
|
||||
|
||||
// unlock releases the mutex (or decrements re-entrancy depth).
|
||||
func (r *reentrantMu) unlock() {
|
||||
r.mu.Lock()
|
||||
r.depth--
|
||||
if r.depth == 0 {
|
||||
r.owner = 0
|
||||
r.cond.Signal()
|
||||
}
|
||||
r.mu.Unlock()
|
||||
}
|
||||
|
||||
// goroutineID extracts the current goroutine's ID from runtime.Stack output.
|
||||
// This is a well-known technique used by Go testing infrastructure.
|
||||
func goroutineID() int64 {
|
||||
var buf [64]byte
|
||||
n := runtime.Stack(buf[:], false)
|
||||
// Stack output starts with "goroutine NNN ["
|
||||
s := buf[:n]
|
||||
s = s[len("goroutine "):]
|
||||
s = s[:bytes.IndexByte(s, ' ')]
|
||||
id, _ := strconv.ParseInt(string(s), 10, 64)
|
||||
return id
|
||||
}
|
||||
|
||||
// Runner manages loaded extensions and dispatches events to their handlers
|
||||
// sequentially. Handlers execute in extension
|
||||
// load order; for cancellable events the first blocking result wins.
|
||||
//
|
||||
// Each extension has a dedicated reentrant mutex so that handlers for the
|
||||
// same extension are serialized (preventing data races on shared package-level
|
||||
// state), while handlers for different extensions may execute concurrently.
|
||||
type Runner struct {
|
||||
extensions []LoadedExtension
|
||||
extMu []reentrantMu // per-extension reentrant mutex, indexed by extension position
|
||||
ctx Context
|
||||
widgets map[string]WidgetConfig // keyed by widget ID
|
||||
statusEntries map[string]StatusBarEntry // keyed by status key
|
||||
@@ -52,7 +124,11 @@ type LoadedExtension struct {
|
||||
|
||||
// NewRunner creates a Runner from a set of loaded extensions.
|
||||
func NewRunner(exts []LoadedExtension) *Runner {
|
||||
return &Runner{extensions: exts}
|
||||
mus := make([]reentrantMu, len(exts))
|
||||
for i := range mus {
|
||||
mus[i].init()
|
||||
}
|
||||
return &Runner{extensions: exts, extMu: mus}
|
||||
}
|
||||
|
||||
// SetContext updates the runtime context (session ID, model, etc.) that is
|
||||
@@ -367,6 +443,11 @@ func (r *Runner) Emit(event Event) (Result, error) {
|
||||
for i := range r.extensions {
|
||||
ext := &r.extensions[i]
|
||||
handlers := ext.Handlers[event.Type()]
|
||||
if len(handlers) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
r.extMu[i].lock()
|
||||
for _, handler := range handlers {
|
||||
result, err := safeCall(handler, event, ctx)
|
||||
if err != nil {
|
||||
@@ -379,6 +460,7 @@ func (r *Runner) Emit(event Event) (Result, error) {
|
||||
|
||||
// Check for blocking/short-circuit results.
|
||||
if isBlocking(result) {
|
||||
r.extMu[i].unlock()
|
||||
return result, nil
|
||||
}
|
||||
|
||||
@@ -386,6 +468,7 @@ func (r *Runner) Emit(event Event) (Result, error) {
|
||||
// the caller is responsible for applying the modifications.
|
||||
accumulated = result
|
||||
}
|
||||
r.extMu[i].unlock()
|
||||
}
|
||||
return accumulated, nil
|
||||
}
|
||||
@@ -712,11 +795,17 @@ func (r *Runner) EmitCustomEvent(name, data string) {
|
||||
|
||||
// Extension-registered handlers first (in load order).
|
||||
for i := range r.extensions {
|
||||
for _, h := range r.extensions[i].CustomEventHandlers[name] {
|
||||
extHandlers := r.extensions[i].CustomEventHandlers[name]
|
||||
if len(extHandlers) == 0 {
|
||||
continue
|
||||
}
|
||||
r.extMu[i].lock()
|
||||
for _, h := range extHandlers {
|
||||
safeInvoke(h)
|
||||
}
|
||||
r.extMu[i].unlock()
|
||||
}
|
||||
// Then dynamic subscriptions.
|
||||
// Then dynamic subscriptions (not extension-scoped, no per-ext lock).
|
||||
for _, h := range dynamicHandlers {
|
||||
safeInvoke(h)
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package extensions
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -571,3 +572,142 @@ func TestRunner_ContextPrintNilSafe(t *testing.T) {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunner_ConcurrentEmitSameExtension(t *testing.T) {
|
||||
// Verify that concurrent Emit calls for the same extension are serialized
|
||||
// and don't cause data races on shared handler state.
|
||||
var counter int
|
||||
ext := makeHandlerExt("shared-state.go", map[EventType][]HandlerFunc{
|
||||
SubagentStart: {
|
||||
func(e Event, c Context) Result {
|
||||
// Read-modify-write: racy without serialization.
|
||||
v := counter
|
||||
counter = v + 1
|
||||
return nil
|
||||
},
|
||||
},
|
||||
SubagentChunk: {
|
||||
func(e Event, c Context) Result {
|
||||
v := counter
|
||||
counter = v + 1
|
||||
return nil
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
r := makeRunner(ext)
|
||||
var wg sync.WaitGroup
|
||||
const goroutines = 20
|
||||
const iterations = 50
|
||||
wg.Add(goroutines)
|
||||
for range goroutines {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for range iterations {
|
||||
_, _ = r.Emit(SubagentStartEvent{ToolCallID: "x"})
|
||||
_, _ = r.Emit(SubagentChunkEvent{ToolCallID: "x"})
|
||||
}
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
if counter != goroutines*iterations*2 {
|
||||
t.Errorf("expected counter=%d, got %d (race detected)", goroutines*iterations*2, counter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunner_ConcurrentEmitDifferentExtensions(t *testing.T) {
|
||||
// Two extensions with independent state should not block each other
|
||||
// and should both run correctly under concurrent Emit calls.
|
||||
var counter1, counter2 int
|
||||
ext1 := makeHandlerExt("ext1.go", map[EventType][]HandlerFunc{
|
||||
SubagentStart: {
|
||||
func(e Event, c Context) Result {
|
||||
v := counter1
|
||||
counter1 = v + 1
|
||||
return nil
|
||||
},
|
||||
},
|
||||
})
|
||||
ext2 := makeHandlerExt("ext2.go", map[EventType][]HandlerFunc{
|
||||
SubagentStart: {
|
||||
func(e Event, c Context) Result {
|
||||
v := counter2
|
||||
counter2 = v + 1
|
||||
return nil
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
r := makeRunner(ext1, ext2)
|
||||
var wg sync.WaitGroup
|
||||
const goroutines = 20
|
||||
const iterations = 50
|
||||
wg.Add(goroutines)
|
||||
for range goroutines {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for range iterations {
|
||||
_, _ = r.Emit(SubagentStartEvent{ToolCallID: "x"})
|
||||
}
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
expected := goroutines * iterations
|
||||
if counter1 != expected {
|
||||
t.Errorf("ext1 counter: expected %d, got %d", expected, counter1)
|
||||
}
|
||||
if counter2 != expected {
|
||||
t.Errorf("ext2 counter: expected %d, got %d", expected, counter2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunner_ReentrantEmitCustomEvent(t *testing.T) {
|
||||
// Verify that a handler can call EmitCustomEvent (which dispatches to
|
||||
// the same extension's custom event handlers) without deadlocking.
|
||||
var order []string
|
||||
ext := LoadedExtension{
|
||||
Path: "reentrant.go",
|
||||
Handlers: map[EventType][]HandlerFunc{
|
||||
SessionStart: {
|
||||
func(e Event, c Context) Result {
|
||||
order = append(order, "session_start")
|
||||
// This triggers EmitCustomEvent for the same extension
|
||||
// via a direct runner call (simulating ctx.EmitCustomEvent).
|
||||
return nil
|
||||
},
|
||||
},
|
||||
},
|
||||
CustomEventHandlers: map[string][]func(string){
|
||||
"test-event": {
|
||||
func(data string) {
|
||||
order = append(order, "custom:"+data)
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
r := makeRunner(ext)
|
||||
|
||||
// Wire up the handler to call EmitCustomEvent re-entrantly.
|
||||
ext.Handlers[SessionStart] = []HandlerFunc{
|
||||
func(e Event, c Context) Result {
|
||||
order = append(order, "session_start")
|
||||
r.EmitCustomEvent("test-event", "hello")
|
||||
return nil
|
||||
},
|
||||
}
|
||||
r.extensions[0] = ext
|
||||
// Rebuild mutexes after modifying extensions slice.
|
||||
r.extMu = make([]reentrantMu, len(r.extensions))
|
||||
for i := range r.extMu {
|
||||
r.extMu[i].init()
|
||||
}
|
||||
|
||||
_, err := r.Emit(SessionStartEvent{})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if len(order) != 2 || order[0] != "session_start" || order[1] != "custom:hello" {
|
||||
t.Errorf("expected [session_start, custom:hello], got %v", order)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -251,6 +251,11 @@ func CreateProvider(ctx context.Context, config *ProviderConfig) (*ProviderResul
|
||||
// via CLI flag or global config.
|
||||
ApplyModelSettings(config, modelInfo)
|
||||
|
||||
// Auto-raise MaxTokens toward the model's known output ceiling when the
|
||||
// user hasn't explicitly set --max-tokens and no per-model override
|
||||
// applied. Runs after ApplyModelSettings so explicit modelSettings win.
|
||||
rightSizeMaxTokens(config, modelInfo)
|
||||
|
||||
// Create the base provider
|
||||
var result *ProviderResult
|
||||
var createErr error
|
||||
@@ -489,6 +494,37 @@ func validateModelConfig(config *ProviderConfig, modelInfo *ModelInfo) {
|
||||
}
|
||||
}
|
||||
|
||||
// defaultRightSizeCap bounds auto-raised MaxTokens so that we don't silently
|
||||
// allocate enormous output budgets for models with very high ceilings (e.g.
|
||||
// Devstral at 262144, Mistral at 128000). Users who genuinely want more can
|
||||
// pass --max-tokens explicitly or set modelSettings[...].maxTokens in config.
|
||||
const defaultRightSizeCap = 32768
|
||||
|
||||
// rightSizeMaxTokens raises config.MaxTokens toward the model's known output
|
||||
// ceiling when:
|
||||
// - the user has not explicitly set --max-tokens (or the KIT_MAX_TOKENS env
|
||||
// var, or the top-level max-tokens key in config.yaml), AND
|
||||
// - no per-model override already bumped MaxTokens (ApplyModelSettings runs
|
||||
// before this function), AND
|
||||
// - modelInfo.Limit.Output is known and larger than the current MaxTokens.
|
||||
//
|
||||
// The raised value is capped at defaultRightSizeCap to keep accidental
|
||||
// allocations reasonable on very-large-output models. This prevents the
|
||||
// common "ghost" where the agent's reply is silently truncated at the 8192
|
||||
// default even though the selected model supports 64k or 262k output tokens.
|
||||
func rightSizeMaxTokens(config *ProviderConfig, modelInfo *ModelInfo) {
|
||||
if modelInfo == nil || modelInfo.Limit.Output <= 0 {
|
||||
return
|
||||
}
|
||||
if isExplicitlySet("max-tokens") {
|
||||
return
|
||||
}
|
||||
target := min(modelInfo.Limit.Output, defaultRightSizeCap)
|
||||
if config.MaxTokens < target {
|
||||
config.MaxTokens = target
|
||||
}
|
||||
}
|
||||
|
||||
// clearConflictingAnthropicSamplingParams ensures that temperature and top_p are
|
||||
// not both sent to the Anthropic API, which rejects requests containing both.
|
||||
// When both are set (typically from defaults), top_p is cleared so that
|
||||
|
||||
@@ -0,0 +1,148 @@
|
||||
package models
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/spf13/pflag"
|
||||
"github.com/spf13/viper"
|
||||
)
|
||||
|
||||
// bindMaxTokensFlag wires a fresh pflag-backed "max-tokens" key into viper so
|
||||
// isExplicitlySet behaves the same way it does in production. Returns a
|
||||
// cleanup function that removes the binding so sibling tests see a clean
|
||||
// state.
|
||||
func bindMaxTokensFlag(t *testing.T, args []string) func() {
|
||||
t.Helper()
|
||||
fs := pflag.NewFlagSet("test", pflag.ContinueOnError)
|
||||
fs.Int("max-tokens", 8192, "")
|
||||
if err := viper.BindPFlag("max-tokens", fs.Lookup("max-tokens")); err != nil {
|
||||
t.Fatalf("BindPFlag: %v", err)
|
||||
}
|
||||
if err := fs.Parse(args); err != nil {
|
||||
t.Fatalf("fs.Parse: %v", err)
|
||||
}
|
||||
return func() {
|
||||
viper.Reset()
|
||||
}
|
||||
}
|
||||
|
||||
func TestRightSizeMaxTokens_RaisesWhenBelowCeiling(t *testing.T) {
|
||||
cleanup := bindMaxTokensFlag(t, nil) // no args → flag.Changed = false
|
||||
defer cleanup()
|
||||
|
||||
config := &ProviderConfig{MaxTokens: 8192}
|
||||
modelInfo := &ModelInfo{
|
||||
ID: "claude-sonnet-4-5",
|
||||
Limit: Limit{Context: 200000, Output: 64000},
|
||||
}
|
||||
|
||||
rightSizeMaxTokens(config, modelInfo)
|
||||
|
||||
if config.MaxTokens != 32768 {
|
||||
t.Errorf("expected MaxTokens raised to defaultRightSizeCap (32768), got %d", config.MaxTokens)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRightSizeMaxTokens_CapsAtDefaultRightSizeCap(t *testing.T) {
|
||||
cleanup := bindMaxTokensFlag(t, nil)
|
||||
defer cleanup()
|
||||
|
||||
config := &ProviderConfig{MaxTokens: 8192}
|
||||
// Mistral Devstral has 262144 output — we should still cap at 32768.
|
||||
modelInfo := &ModelInfo{
|
||||
ID: "devstral-medium-latest",
|
||||
Limit: Limit{Context: 262144, Output: 262144},
|
||||
}
|
||||
|
||||
rightSizeMaxTokens(config, modelInfo)
|
||||
|
||||
if config.MaxTokens != defaultRightSizeCap {
|
||||
t.Errorf("expected MaxTokens capped at %d, got %d", defaultRightSizeCap, config.MaxTokens)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRightSizeMaxTokens_UsesExactOutputWhenBelowCap(t *testing.T) {
|
||||
cleanup := bindMaxTokensFlag(t, nil)
|
||||
defer cleanup()
|
||||
|
||||
config := &ProviderConfig{MaxTokens: 4096}
|
||||
// Model with output limit smaller than the cap.
|
||||
modelInfo := &ModelInfo{
|
||||
ID: "gpt-4",
|
||||
Limit: Limit{Context: 8192, Output: 8192},
|
||||
}
|
||||
|
||||
rightSizeMaxTokens(config, modelInfo)
|
||||
|
||||
if config.MaxTokens != 8192 {
|
||||
t.Errorf("expected MaxTokens raised to model output ceiling (8192), got %d", config.MaxTokens)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRightSizeMaxTokens_DoesNotLowerCurrentValue(t *testing.T) {
|
||||
cleanup := bindMaxTokensFlag(t, nil)
|
||||
defer cleanup()
|
||||
|
||||
// User (via per-model settings, applied earlier) already bumped MaxTokens
|
||||
// above the cap — we must not clobber their choice.
|
||||
config := &ProviderConfig{MaxTokens: 100000}
|
||||
modelInfo := &ModelInfo{
|
||||
ID: "devstral-medium-latest",
|
||||
Limit: Limit{Context: 262144, Output: 262144},
|
||||
}
|
||||
|
||||
rightSizeMaxTokens(config, modelInfo)
|
||||
|
||||
if config.MaxTokens != 100000 {
|
||||
t.Errorf("expected MaxTokens preserved at 100000, got %d", config.MaxTokens)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRightSizeMaxTokens_RespectsExplicitFlag(t *testing.T) {
|
||||
// Simulate `--max-tokens 4096` on the command line.
|
||||
cleanup := bindMaxTokensFlag(t, []string{"--max-tokens", "4096"})
|
||||
defer cleanup()
|
||||
|
||||
config := &ProviderConfig{MaxTokens: 4096}
|
||||
modelInfo := &ModelInfo{
|
||||
ID: "claude-sonnet-4-5",
|
||||
Limit: Limit{Context: 200000, Output: 64000},
|
||||
}
|
||||
|
||||
rightSizeMaxTokens(config, modelInfo)
|
||||
|
||||
if config.MaxTokens != 4096 {
|
||||
t.Errorf("expected explicit --max-tokens to be preserved (4096), got %d", config.MaxTokens)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRightSizeMaxTokens_NilModelInfo(t *testing.T) {
|
||||
cleanup := bindMaxTokensFlag(t, nil)
|
||||
defer cleanup()
|
||||
|
||||
config := &ProviderConfig{MaxTokens: 8192}
|
||||
// Custom model / Ollama / unknown provider → no model info.
|
||||
rightSizeMaxTokens(config, nil)
|
||||
|
||||
if config.MaxTokens != 8192 {
|
||||
t.Errorf("expected MaxTokens unchanged with nil modelInfo, got %d", config.MaxTokens)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRightSizeMaxTokens_ZeroOutputLimit(t *testing.T) {
|
||||
cleanup := bindMaxTokensFlag(t, nil)
|
||||
defer cleanup()
|
||||
|
||||
config := &ProviderConfig{MaxTokens: 8192}
|
||||
// Model present in catalog but with no known output limit.
|
||||
modelInfo := &ModelInfo{
|
||||
ID: "unknown-model",
|
||||
Limit: Limit{Context: 0, Output: 0},
|
||||
}
|
||||
|
||||
rightSizeMaxTokens(config, modelInfo)
|
||||
|
||||
if config.MaxTokens != 8192 {
|
||||
t.Errorf("expected MaxTokens unchanged with zero output limit, got %d", config.MaxTokens)
|
||||
}
|
||||
}
|
||||
@@ -69,30 +69,6 @@ func TestInputComponent_SubmitEmitsSubmitMsg(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestInputComponent_CtrlD_SubmitEmitsSubmitMsg verifies that ctrl+d also
|
||||
// submits the text.
|
||||
func TestInputComponent_CtrlD_SubmitEmitsSubmitMsg(t *testing.T) {
|
||||
ctrl := &stubAppController{}
|
||||
c := newTestInput(ctrl)
|
||||
|
||||
c.textarea.SetValue("ctrl+d submit")
|
||||
c.lastValue = "ctrl+d submit"
|
||||
|
||||
_, cmd := sendInputMsg(c, tea.KeyPressMsg{Code: 'd', Mod: tea.ModCtrl})
|
||||
|
||||
msg := runCmd(cmd)
|
||||
if msg == nil {
|
||||
t.Fatal("expected a cmd from ctrl+d on non-empty input")
|
||||
}
|
||||
sm, ok := msg.(core.SubmitMsg)
|
||||
if !ok {
|
||||
t.Fatalf("expected submitMsg from ctrl+d, got %T", msg)
|
||||
}
|
||||
if sm.Text != "ctrl+d submit" {
|
||||
t.Fatalf("expected Text='ctrl+d submit', got %q", sm.Text)
|
||||
}
|
||||
}
|
||||
|
||||
// TestInputComponent_EmptySubmit_NoCmd verifies that submitting an empty or
|
||||
// whitespace-only string produces no cmd.
|
||||
func TestInputComponent_EmptySubmit_NoCmd(t *testing.T) {
|
||||
|
||||
+20
-4
@@ -201,7 +201,7 @@ func (s *InputComponent) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
case tea.KeyPressMsg:
|
||||
if !s.showPopup {
|
||||
switch msg.String() {
|
||||
case "ctrl+d", "enter":
|
||||
case "enter":
|
||||
value := s.textarea.Value()
|
||||
s.pushHistory(value)
|
||||
s.textarea.SetValue("")
|
||||
@@ -708,9 +708,25 @@ func (s *InputComponent) renderPopupWithOptions(centered bool) string {
|
||||
}
|
||||
content = indicator + displayName
|
||||
} else {
|
||||
nameWidth := 15
|
||||
if innerWidth < 25 {
|
||||
nameWidth = max(innerWidth*2/5+1, 8)
|
||||
// Compute nameWidth from the longest command name in the
|
||||
// visible slice so we never truncate unnecessarily.
|
||||
nameWidth := 0
|
||||
for _, fm := range s.filtered {
|
||||
if n := len([]rune(fm.Command.Name)); n > nameWidth {
|
||||
nameWidth = n
|
||||
}
|
||||
}
|
||||
nameWidth += 3 // account for indicator prefix (2) + gap before description (1)
|
||||
// Ensure descriptions still get at least 20 chars when possible.
|
||||
maxForName := innerWidth - 20
|
||||
if maxForName < 8 {
|
||||
maxForName = innerWidth * 2 / 3
|
||||
}
|
||||
if nameWidth > maxForName {
|
||||
nameWidth = maxForName
|
||||
}
|
||||
if nameWidth < 8 {
|
||||
nameWidth = 8
|
||||
}
|
||||
maxNameChars := nameWidth - 2
|
||||
displayName := sc.Name
|
||||
|
||||
+12
-9
@@ -1318,11 +1318,11 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
m.scrollList.autoScroll = true
|
||||
}
|
||||
return m, tea.Batch(cmds...)
|
||||
case "alt+home":
|
||||
case "ctrl+home":
|
||||
m.scrollList.GotoTop()
|
||||
m.scrollList.autoScroll = false
|
||||
return m, tea.Batch(cmds...)
|
||||
case "alt+end":
|
||||
case "ctrl+end":
|
||||
m.scrollList.GotoBottom()
|
||||
m.scrollList.autoScroll = true
|
||||
return m, tea.Batch(cmds...)
|
||||
@@ -1330,15 +1330,10 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
}
|
||||
|
||||
// Thinking keybindings — only when the model supports reasoning.
|
||||
// Note: thinking visibility toggle is under leader chord (Ctrl+X t)
|
||||
// to avoid conflicts with terminal multiplexers.
|
||||
if m.isReasoningModel {
|
||||
switch msg.String() {
|
||||
case "ctrl+t":
|
||||
// Toggle thinking block visibility.
|
||||
m.thinkingVisible = !m.thinkingVisible
|
||||
if m.stream != nil {
|
||||
m.stream.SetThinkingVisible(m.thinkingVisible)
|
||||
}
|
||||
return m, tea.Batch(cmds...)
|
||||
case "shift+tab":
|
||||
// Cycle thinking level.
|
||||
m.cycleThinkingLevel()
|
||||
@@ -1439,6 +1434,14 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
}
|
||||
}
|
||||
}
|
||||
case "t":
|
||||
// Ctrl+X t → Toggle thinking block visibility.
|
||||
if m.isReasoningModel {
|
||||
m.thinkingVisible = !m.thinkingVisible
|
||||
if m.stream != nil {
|
||||
m.stream.SetThinkingVisible(m.thinkingVisible)
|
||||
}
|
||||
}
|
||||
case "e":
|
||||
// Ctrl+X e → open $EDITOR to compose/edit the prompt.
|
||||
editorApp := os.Getenv("VISUAL")
|
||||
|
||||
@@ -588,8 +588,10 @@ func formatToolExecutionMessage(toolName string) string {
|
||||
return toolName
|
||||
}
|
||||
|
||||
// UpdateTheme refreshes the component's typography instance with colors from
|
||||
// the current theme. This is called when the user changes themes via /theme.
|
||||
// UpdateTheme refreshes the component's typography instance and spinner
|
||||
// animation frames with colors from the current theme. This is called when
|
||||
// the user changes themes via /theme.
|
||||
func (s *StreamComponent) UpdateTheme() {
|
||||
s.ty = createTypography(GetTheme())
|
||||
s.spinnerFrames = knightRiderFrames()
|
||||
}
|
||||
|
||||
@@ -200,10 +200,6 @@ func (ts *TreeSelectorComponent) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
case key.Matches(msg, key.NewBinding(key.WithKeys("ctrl+l"))):
|
||||
ts.filter = TreeFilterLabelOnly
|
||||
ts.rebuildFlatList()
|
||||
case key.Matches(msg, key.NewBinding(key.WithKeys("ctrl+a"))):
|
||||
ts.filter = TreeFilterAll
|
||||
ts.rebuildFlatList()
|
||||
|
||||
default:
|
||||
// Typing search.
|
||||
if msg.Text != "" && len(msg.Text) == 1 {
|
||||
|
||||
+39
-3
@@ -110,6 +110,38 @@ func parseToolArgs(toolArgs string) map[string]any {
|
||||
return nil
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Finish reason constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Finish reasons reported by the LLM provider on a completed turn. These
|
||||
// mirror fantasy.FinishReason string values so comparisons against
|
||||
// TurnEndEvent.StopReason / TurnResult.StopReason are stable across
|
||||
// providers.
|
||||
const (
|
||||
// FinishReasonStop: the model produced a natural stop (e.g. stop sequence
|
||||
// or end-of-turn signal).
|
||||
FinishReasonStop = "stop"
|
||||
// FinishReasonLength: the model hit the configured max_output_tokens
|
||||
// budget. The response is truncated. Surface this to the user and
|
||||
// consider raising --max-tokens / KIT_MAX_TOKENS / modelSettings[...]
|
||||
// .maxTokens.
|
||||
FinishReasonLength = "length"
|
||||
// FinishReasonToolCalls: the model stopped to emit tool calls (normal
|
||||
// mid-turn state during agentic loops).
|
||||
FinishReasonToolCalls = "tool-calls"
|
||||
// FinishReasonContentFilter: the provider's safety filter stopped
|
||||
// generation.
|
||||
FinishReasonContentFilter = "content-filter"
|
||||
// FinishReasonError: the model stopped because of an error.
|
||||
FinishReasonError = "error"
|
||||
// FinishReasonOther: provider-specific reason that doesn't map to any of
|
||||
// the above.
|
||||
FinishReasonOther = "other"
|
||||
// FinishReasonUnknown: the provider didn't report a finish reason.
|
||||
FinishReasonUnknown = "unknown"
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Concrete event structs
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -124,9 +156,13 @@ func (e TurnStartEvent) EventType() EventType { return EventTurnStart }
|
||||
|
||||
// TurnEndEvent fires after the agent finishes processing.
|
||||
type TurnEndEvent struct {
|
||||
Response string
|
||||
Error error
|
||||
StopReason string // "end_turn", "max_tokens", "tool_use", "error", etc.
|
||||
Response string
|
||||
Error error
|
||||
// StopReason is the LLM provider's finish reason for the final step of
|
||||
// the turn. Compare against the FinishReason* constants — in particular,
|
||||
// FinishReasonLength indicates the response was truncated because the
|
||||
// agent hit its max_output_tokens budget.
|
||||
StopReason string
|
||||
}
|
||||
|
||||
// EventType implements Event.
|
||||
|
||||
+49
-5
@@ -51,6 +51,7 @@ type Kit struct {
|
||||
bufferedLogger *tools.BufferedDebugLogger
|
||||
authHandler MCPAuthHandler // OAuth handler for remote MCP servers (may need Close)
|
||||
opts *Options // stored for reload operations (skills, etc.)
|
||||
mcpConfig *config.Config // loaded MCP/server config, shared with subagents
|
||||
|
||||
// hasCustomSystemPrompt is true when the user explicitly configured a
|
||||
// system prompt (via --system-prompt flag, config file, or SDK option).
|
||||
@@ -849,6 +850,13 @@ type Options struct {
|
||||
// (e.g. AGENTS.md) from the working directory.
|
||||
NoContextFiles bool
|
||||
|
||||
// MCPConfig provides a pre-loaded MCP configuration. When set,
|
||||
// LoadAndValidateConfig is skipped during Kit creation — avoiding
|
||||
// viper access entirely. This is set automatically for in-process
|
||||
// subagents (inheriting the parent's loaded config) and can be used
|
||||
// by SDK consumers who build config programmatically.
|
||||
MCPConfig *config.Config
|
||||
|
||||
// InProcessMCPServers registers mcp-go servers that run in the same
|
||||
// process. Each key is the server name (used to prefix tool names, e.g.
|
||||
// "docs__search"). The value must be a *[server.MCPServer].
|
||||
@@ -1136,8 +1144,11 @@ func New(ctx context.Context, opts *Options) (*Kit, error) {
|
||||
}
|
||||
// ---- viperInitMu released — heavy I/O below runs concurrently ----
|
||||
|
||||
// Load MCP configuration. Use pre-loaded config if provided via CLI options.
|
||||
if opts.CLI != nil && opts.CLI.MCPConfig != nil {
|
||||
// Load MCP configuration. Use pre-loaded config if provided directly,
|
||||
// via CLI options, or load from viper as a last resort.
|
||||
if opts.MCPConfig != nil {
|
||||
mcpConfig = opts.MCPConfig
|
||||
} else if opts.CLI != nil && opts.CLI.MCPConfig != nil {
|
||||
mcpConfig = opts.CLI.MCPConfig
|
||||
}
|
||||
if mcpConfig == nil {
|
||||
@@ -1258,6 +1269,7 @@ func New(ctx context.Context, opts *Options) (*Kit, error) {
|
||||
bufferedLogger: agentResult.BufferedLogger,
|
||||
authHandler: setupOpts.AuthHandler,
|
||||
opts: opts,
|
||||
mcpConfig: mcpConfig,
|
||||
hasCustomSystemPrompt: hasCustomSystemPrompt,
|
||||
beforeToolCall: beforeToolCall,
|
||||
afterToolResult: afterToolResult,
|
||||
@@ -1439,8 +1451,9 @@ type TurnResult struct {
|
||||
Response string
|
||||
|
||||
// StopReason indicates why the turn ended. Derived from the LLM
|
||||
// provider's finish reason: "stop", "length" (max tokens), "tool-calls",
|
||||
// "content-filter", "error", "other", "unknown".
|
||||
// provider's finish reason: FinishReasonStop, FinishReasonLength (max
|
||||
// output tokens reached), FinishReasonToolCalls, FinishReasonContentFilter,
|
||||
// FinishReasonError, FinishReasonOther, FinishReasonUnknown.
|
||||
StopReason string
|
||||
|
||||
// SessionID is the UUID of the session this turn belongs to.
|
||||
@@ -1582,13 +1595,15 @@ func (m *Kit) Subagent(ctx context.Context, cfg SubagentConfig) (*SubagentResult
|
||||
tools = SubagentTools()
|
||||
}
|
||||
|
||||
// Create child Kit instance.
|
||||
// Create child Kit instance. Pass the parent's loaded MCP config to
|
||||
// avoid re-reading viper (which races with concurrent subagent spawns).
|
||||
childOpts := &Options{
|
||||
Model: model,
|
||||
SystemPrompt: systemPrompt,
|
||||
Tools: tools,
|
||||
NoSession: cfg.NoSession,
|
||||
Quiet: true,
|
||||
MCPConfig: m.mcpConfig,
|
||||
}
|
||||
child, err := New(ctx, childOpts)
|
||||
if err != nil {
|
||||
@@ -2235,6 +2250,35 @@ func (m *Kit) GetTools() []Tool {
|
||||
return m.agent.GetTools()
|
||||
}
|
||||
|
||||
// MaxTokens returns the effective max output tokens currently configured for
|
||||
// the agent. This is the value actually sent to the LLM provider on each
|
||||
// request, after CLI/env/config resolution, per-model overrides, model-aware
|
||||
// right-sizing, and any Anthropic thinking-budget adjustments.
|
||||
//
|
||||
// Returns 0 when the active provider suppresses the max_output_tokens
|
||||
// parameter (e.g. OpenAI Codex OAuth) or when no model is configured yet.
|
||||
// A non-zero value is the number that will cause a FinishReasonLength
|
||||
// truncation if the model tries to generate beyond it.
|
||||
func (m *Kit) MaxTokens() int {
|
||||
if m.agent == nil {
|
||||
return 0
|
||||
}
|
||||
return m.agent.GetMaxTokens()
|
||||
}
|
||||
|
||||
// MaxOutputLimit returns the catalog-reported output ceiling for the current
|
||||
// model in tokens, or 0 when the model isn't in the registry (custom models,
|
||||
// new releases, Ollama, etc.). Pair with MaxTokens() to detect when the agent
|
||||
// is configured well below what the model supports and surface a hint to the
|
||||
// user.
|
||||
func (m *Kit) MaxOutputLimit() int {
|
||||
info := m.GetModelInfo()
|
||||
if info == nil {
|
||||
return 0
|
||||
}
|
||||
return info.Limit.Output
|
||||
}
|
||||
|
||||
// extractFileParts returns all FilePart entries from a message's Content.
|
||||
// Used to preserve image attachments when replacing user message text.
|
||||
func extractFileParts(msg fantasy.Message) []fantasy.FilePart {
|
||||
|
||||
@@ -93,7 +93,7 @@ api.OnAgentEnd(func(e ext.AgentEndEvent, ctx ext.Context) {
|
||||
// e.Response string
|
||||
// e.StopReason string — "error" (on failure), "completed" (when LLM returns
|
||||
// empty stop reason), or the raw LLM provider value passed through
|
||||
// (e.g. "stop", "end_turn", "max_tokens", "tool_use").
|
||||
// (e.g. "stop", "length" (max output tokens hit), "tool-calls", "content-filter").
|
||||
// To detect errors, check e.StopReason == "error".
|
||||
// Do NOT compare against "completed" for success — instead check != "error".
|
||||
})
|
||||
|
||||
@@ -270,6 +270,27 @@ unsub := host.Subscribe(func(e kit.Event) {
|
||||
| `reasoning_delta` | `ReasoningDeltaEvent` | `Delta` |
|
||||
| `step_usage` | `StepUsageEvent` | `InputTokens`, `OutputTokens`, `CacheReadTokens`, `CacheWriteTokens` |
|
||||
| `steer_consumed` | `SteerConsumedEvent` | `Count` |
|
||||
| `password_prompt` | `PasswordPromptEvent` | `Prompt`, `ResponseCh` |
|
||||
|
||||
**PasswordPromptEvent** (for sudo password handling):
|
||||
```go
|
||||
// PasswordPromptEvent fires when a sudo command needs a password.
|
||||
// The TUI should display a password prompt and send the result back via ResponseCh.
|
||||
type PasswordPromptEvent struct {
|
||||
// Prompt is the message to display to the user.
|
||||
Prompt string
|
||||
// ResponseCh receives the password from the TUI.
|
||||
// The TUI must send exactly one value: (password, false) for submit
|
||||
// or ("", true) for cancel.
|
||||
ResponseCh chan<- PasswordPromptResponse
|
||||
}
|
||||
|
||||
// PasswordPromptResponse carries the password prompt result.
|
||||
type PasswordPromptResponse struct {
|
||||
Password string
|
||||
Cancelled bool
|
||||
}
|
||||
```
|
||||
|
||||
### Tool kind constants
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ These flags control Kit's behavior. When a prompt is passed as a positional argu
|
||||
|
||||
| Flag | Short | Default | Description |
|
||||
|------|-------|---------|-------------|
|
||||
| `--max-tokens` | — | `4096` | Maximum tokens in response |
|
||||
| `--max-tokens` | — | `8192` | Base cap for output tokens. Auto-raised per-model up to 32768 when the model's catalog ceiling is higher and no explicit value is set. |
|
||||
| `--temperature` | — | `0.7` | Randomness 0.0–1.0 |
|
||||
| `--top-p` | — | `0.95` | Nucleus sampling 0.0–1.0 |
|
||||
| `--top-k` | — | `40` | Limit top K tokens |
|
||||
|
||||
@@ -18,7 +18,7 @@ Create `~/.kit.yml`:
|
||||
|
||||
```yaml
|
||||
model: anthropic/claude-sonnet-latest
|
||||
max-tokens: 4096
|
||||
max-tokens: 8192
|
||||
temperature: 0.7
|
||||
stream: true
|
||||
```
|
||||
@@ -28,7 +28,7 @@ stream: true
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| `model` | string | `anthropic/claude-sonnet-latest` | Model to use (provider/model format) |
|
||||
| `max-tokens` | int | `4096` | Maximum tokens in response |
|
||||
| `max-tokens` | int | `8192` | Base cap for output tokens. Auto-raised per-model up to 32768 when the model's catalog ceiling is higher and no explicit value is set. Use [`modelSettings[provider/model].maxTokens`](#per-model-settings) to override per-model. |
|
||||
| `temperature` | float | `0.7` | Randomness 0.0–1.0 |
|
||||
| `top-p` | float | `0.95` | Nucleus sampling 0.0–1.0 |
|
||||
| `top-k` | int | `40` | Limit top K tokens |
|
||||
|
||||
@@ -37,7 +37,7 @@ internal/acpserver/ - ACP (Agent Client Protocol) server
|
||||
internal/clipboard/ - Cross-platform clipboard operations
|
||||
internal/compaction/ - Conversation compaction and summarization
|
||||
internal/config/ - Configuration management
|
||||
internal/core/ - Built-in tools (bash, read, write, edit, grep, find, ls)
|
||||
internal/core/ - Built-in tools (bash with sudo password prompt, read, write, edit, grep, find, ls)
|
||||
internal/extensions/ - Yaegi extension system
|
||||
internal/kitsetup/ - Initial setup wizard
|
||||
internal/message/ - Message content types and structured content blocks
|
||||
|
||||
+1
-1
@@ -13,7 +13,7 @@ A powerful, extensible AI coding agent CLI with multi-provider support, built-in
|
||||
## Features
|
||||
|
||||
- **Multi-Provider LLM Support** — Anthropic, OpenAI, Google Gemini, Ollama, Azure OpenAI, AWS Bedrock, OpenRouter, and more
|
||||
- **Built-in Core Tools** — bash, read, write, edit, grep, find, ls, subagent with no MCP overhead
|
||||
- **Built-in Core Tools** — bash (with interactive sudo password prompt), read, write, edit, grep, find, ls, subagent with no MCP overhead
|
||||
- **Smart @ Attachments** — Binary files auto-detected via MIME type, MCP resources via `@mcp:server:uri`
|
||||
- **MCP Integration** — Connect external MCP servers for expanded capabilities (tools, prompts, and resources)
|
||||
- **Extension System** — Write custom tools, commands, widgets, and UI modifications in Go
|
||||
|
||||
@@ -100,6 +100,19 @@ kit.HookPriorityLow = 100 // runs last
|
||||
|
||||
Lower values run first. First non-nil result wins.
|
||||
|
||||
## All event types
|
||||
|
||||
| Event | Description |
|
||||
|-------|-------------|
|
||||
| `ToolCallEvent` | Tool call parsed and about to execute |
|
||||
| `ToolResultEvent` | Tool execution completed with result |
|
||||
| `ToolOutputEvent` | Streaming output chunk from tool (e.g., bash stdout/stderr) |
|
||||
| `MessageUpdateEvent` | Streaming text chunk from LLM |
|
||||
| `ResponseEvent` | Final response received |
|
||||
| `TurnStartEvent` | Agent turn started |
|
||||
| `TurnEndEvent` | Agent turn completed |
|
||||
| `PasswordPromptEvent` | Sudo command needs password (respond via `ResponseCh`) |
|
||||
|
||||
## Subagent event monitoring
|
||||
|
||||
Monitor real-time events from LLM-initiated subagents (when the model uses the `subagent` tool):
|
||||
|
||||
Reference in New Issue
Block a user