feat: add first-class subagent support for task delegation

Implement 4-phase subagent system enabling LLM and extensions to spawn,
manage, and orchestrate child Kit instances for parallel task execution.

- Phase 1: SDK API with SpawnSubagent() for extensions
- Phase 2: spawn_subagent core tool for LLM usage
- Phase 3: Session hierarchy with ParentSessionID tracking
- Phase 4: Provider pooling for concurrent model access

New files:
- internal/extensions/subagent.go: SpawnSubagent implementation
- internal/core/subagent.go: Core tool definition
- internal/models/pool.go: Provider pool for concurrency
- examples/extensions/subagent-test.go: Test extension
- openspec/subagent-support.md: Design specification
This commit is contained in:
Ed Zynda
2026-03-09 23:07:27 +03:00
parent e613a07773
commit bbd8975ca0
11 changed files with 1570 additions and 0 deletions
+3
View File
@@ -924,6 +924,9 @@ func runNormalMode(ctx context.Context) error {
Index: resp.Index,
}
},
SpawnSubagent: func(config extensions.SubagentConfig) (*extensions.SubagentHandle, *extensions.SubagentResult, error) {
return extensions.SpawnSubagent(config)
},
})
kitInstance.EmitSessionStart()
}
+164
View File
@@ -0,0 +1,164 @@
//go:build ignore
// Subagent Test Extension — Tests the new first-class subagent API
//
// Commands:
//
// /subtest <task> — spawn a blocking subagent and print result
// /subbg <task> — spawn a background subagent with live output
//
// Usage: kit -e examples/extensions/subagent-test.go
package main
import (
"fmt"
"strings"
"sync"
"time"
"kit/ext"
)
var (
mu sync.Mutex
latestCtx ext.Context
hasCtx bool
)
func Init(api ext.API) {
// Keep context fresh
api.OnSessionStart(func(_ ext.SessionStartEvent, ctx ext.Context) {
mu.Lock()
latestCtx = ctx
hasCtx = true
mu.Unlock()
ctx.PrintInfo(
"Subagent Test Extension loaded\n\n" +
"/subtest <task> Spawn blocking subagent\n" +
"/subbg <task> Spawn background subagent\n\n" +
"The LLM can also use the spawn_subagent tool.")
})
api.OnAgentEnd(func(_ ext.AgentEndEvent, ctx ext.Context) {
mu.Lock()
latestCtx = ctx
mu.Unlock()
})
// Command: /subtest <task> — blocking subagent
api.RegisterCommand(ext.CommandDef{
Name: "subtest",
Description: "Spawn a blocking subagent: /subtest <task>",
Execute: func(args string, ctx ext.Context) (string, error) {
mu.Lock()
latestCtx = ctx
hasCtx = true
mu.Unlock()
task := strings.TrimSpace(args)
if task == "" {
return "Usage: /subtest <task>", nil
}
ctx.PrintInfo(fmt.Sprintf("Spawning blocking subagent for: %s", task))
start := time.Now()
_, result, err := ctx.SpawnSubagent(ext.SubagentConfig{
Prompt: task,
Timeout: 2 * time.Minute,
Blocking: true,
})
elapsed := time.Since(start)
if err != nil {
return fmt.Sprintf("Spawn error: %v", err), nil
}
if result == nil {
return "No result returned", nil
}
if result.Error != nil {
return fmt.Sprintf("Subagent failed (exit %d) after %ds: %v\n\nPartial output:\n%s",
result.ExitCode, int(elapsed.Seconds()), result.Error, truncate(result.Response, 2000)), nil
}
response := fmt.Sprintf("Subagent completed in %ds", int(elapsed.Seconds()))
if result.Usage != nil {
response += fmt.Sprintf(" (tokens: %d in / %d out)", result.Usage.InputTokens, result.Usage.OutputTokens)
}
response += fmt.Sprintf("\n\nResult:\n%s", truncate(result.Response, 4000))
return response, nil
},
})
// Command: /subbg <task> — background subagent with callbacks
api.RegisterCommand(ext.CommandDef{
Name: "subbg",
Description: "Spawn a background subagent: /subbg <task>",
Execute: func(args string, ctx ext.Context) (string, error) {
mu.Lock()
latestCtx = ctx
hasCtx = true
mu.Unlock()
task := strings.TrimSpace(args)
if task == "" {
return "Usage: /subbg <task>", nil
}
ctx.PrintInfo(fmt.Sprintf("Spawning background subagent for: %s", task))
start := time.Now()
handle, _, err := ctx.SpawnSubagent(ext.SubagentConfig{
Prompt: task,
Timeout: 2 * time.Minute,
OnOutput: func(chunk string) {
// Live output - could update a widget here
fmt.Print(chunk)
},
OnComplete: func(result ext.SubagentResult) {
elapsed := time.Since(start)
mu.Lock()
c := latestCtx
ok := hasCtx
mu.Unlock()
if !ok {
return
}
if result.Error != nil {
c.SendMessage(fmt.Sprintf("Background subagent failed after %ds: %v",
int(elapsed.Seconds()), result.Error))
return
}
msg := fmt.Sprintf("Background subagent completed in %ds", int(elapsed.Seconds()))
if result.Usage != nil {
msg += fmt.Sprintf(" (tokens: %d in / %d out)", result.Usage.InputTokens, result.Usage.OutputTokens)
}
msg += fmt.Sprintf("\n\nResult:\n%s", truncate(result.Response, 4000))
c.SendMessage(msg)
},
})
if err != nil {
return fmt.Sprintf("Spawn error: %v", err), nil
}
return fmt.Sprintf("Background subagent spawned (ID: %s). Results will be delivered when complete.", handle.ID), nil
},
})
}
func truncate(s string, max int) string {
if len(s) <= max {
return s
}
return s[:max] + "\n\n... [truncated]"
}
+120
View File
@@ -0,0 +1,120 @@
package core
import (
"context"
"fmt"
"time"
"charm.land/fantasy"
"github.com/mark3labs/kit/internal/extensions"
)
const defaultSubagentTimeout = 5 * time.Minute
const maxSubagentTimeout = 30 * time.Minute
type subagentArgs struct {
Task string `json:"task"`
Model string `json:"model,omitempty"`
SystemPrompt string `json:"system_prompt,omitempty"`
TimeoutSeconds int `json:"timeout_seconds,omitempty"`
}
// NewSubagentTool creates the spawn_subagent core tool.
func NewSubagentTool(opts ...ToolOption) fantasy.AgentTool {
return &coreTool{
info: fantasy.ToolInfo{
Name: "spawn_subagent",
Description: `Spawn a background subagent to perform a task autonomously.
The subagent runs as a separate Kit instance with full tool access. Use this to:
- Delegate independent subtasks that can run in parallel
- Perform research or analysis without blocking your main work
- Execute tasks that benefit from a fresh context window
The subagent result is returned when it completes. For long-running tasks,
consider breaking them into smaller focused subtasks.
Example use cases:
- "Research the authentication patterns in this codebase"
- "Write unit tests for the UserService class"
- "Analyze the performance bottlenecks in the database queries"`,
Parameters: map[string]any{
"task": map[string]any{
"type": "string",
"description": "The complete task description for the subagent to perform",
},
"model": map[string]any{
"type": "string",
"description": "Optional model override (e.g. 'anthropic/claude-haiku-3-5-20241022' for faster/cheaper tasks)",
},
"system_prompt": map[string]any{
"type": "string",
"description": "Optional system prompt for domain-specific guidance",
},
"timeout_seconds": map[string]any{
"type": "number",
"description": "Maximum execution time in seconds (default: 300, max: 1800)",
},
},
Required: []string{"task"},
},
handler: func(ctx context.Context, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
return executeSubagent(ctx, call)
},
}
}
func executeSubagent(ctx context.Context, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
var args subagentArgs
if err := parseArgs(call.Input, &args); err != nil {
return fantasy.NewTextErrorResponse("task parameter is required"), nil
}
if args.Task == "" {
return fantasy.NewTextErrorResponse("task parameter is required"), nil
}
// Determine timeout
timeout := defaultSubagentTimeout
if args.TimeoutSeconds > 0 {
timeout = min(time.Duration(args.TimeoutSeconds)*time.Second, maxSubagentTimeout)
}
// Spawn subagent in blocking mode
_, result, err := extensions.SpawnSubagent(extensions.SubagentConfig{
Prompt: args.Task,
Model: args.Model,
SystemPrompt: args.SystemPrompt,
Timeout: timeout,
Blocking: true,
})
if err != nil {
return fantasy.NewTextErrorResponse(fmt.Sprintf("Failed to spawn subagent: %v", err)), nil
}
if result.Error != nil {
// Subagent failed but we still have partial output
response := fmt.Sprintf("Subagent failed (exit code %d) after %ds.\n\nError: %v",
result.ExitCode, int(result.Elapsed.Seconds()), result.Error)
if result.Response != "" {
response += fmt.Sprintf("\n\nPartial output:\n%s", truncateResponse(result.Response, 8000))
}
return fantasy.NewTextErrorResponse(response), nil
}
// Build successful response
response := fmt.Sprintf("Subagent completed successfully in %ds.", int(result.Elapsed.Seconds()))
if result.Usage != nil {
response += fmt.Sprintf(" (tokens: %d in / %d out)", result.Usage.InputTokens, result.Usage.OutputTokens)
}
response += fmt.Sprintf("\n\nResult:\n%s", truncateResponse(result.Response, 12000))
return fantasy.NewTextResponse(response), nil
}
// truncateResponse limits the response length to avoid overwhelming context windows.
func truncateResponse(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "\n\n... [truncated — " + fmt.Sprintf("%d", len(s)-maxLen) + " bytes omitted]"
}
+1
View File
@@ -96,5 +96,6 @@ func AllTools(opts ...ToolOption) []fantasy.AgentTool {
NewGrepTool(opts...),
NewFindTool(opts...),
NewLsTool(opts...),
NewSubagentTool(opts...),
}
}
+35
View File
@@ -491,6 +491,41 @@ type Context struct {
// },
// })
ReloadExtensions func() error
// SpawnSubagent spawns a child Kit instance to perform a task autonomously.
// The subagent runs as a separate subprocess with full tool access but
// isolated session and extensions (--no-session --no-extensions).
//
// When config.Blocking is true, blocks until completion and returns the
// result directly (handle is nil). When false, returns immediately with
// a handle for monitoring/cancellation.
//
// Example — blocking call:
//
// _, result, err := ctx.SpawnSubagent(ext.SubagentConfig{
// Prompt: "Research authentication patterns in this codebase",
// Blocking: true,
// Timeout: 2 * time.Minute,
// })
// if err != nil {
// ctx.PrintError("spawn failed: " + err.Error())
// return
// }
// ctx.PrintInfo("Subagent result:\n" + result.Response)
//
// Example — background spawn with callbacks:
//
// handle, _, _ := ctx.SpawnSubagent(ext.SubagentConfig{
// Prompt: "Write unit tests for UserService",
// OnOutput: func(chunk string) {
// // Live output streaming
// },
// OnComplete: func(result ext.SubagentResult) {
// ctx.SendMessage("Subagent finished:\n" + result.Response)
// },
// })
// // handle.Kill() to cancel, handle.Wait() to block
SpawnSubagent func(SubagentConfig) (*SubagentHandle, *SubagentResult, error)
}
// ---------------------------------------------------------------------------
+328
View File
@@ -0,0 +1,328 @@
// Package extensions provides subagent spawning capabilities for Kit extensions.
package extensions
import (
"bufio"
"context"
"encoding/json"
"fmt"
"os"
"os/exec"
"strings"
"sync"
"sync/atomic"
"time"
)
// ---------------------------------------------------------------------------
// Subagent types
// ---------------------------------------------------------------------------
// SubagentConfig configures a subagent spawn.
type SubagentConfig struct {
// Prompt is the task/instruction for the subagent (required).
Prompt string
// Model overrides the parent's model (e.g. "anthropic/claude-haiku-3-5-20241022").
// Empty string uses the parent's current model.
Model string
// SystemPrompt provides domain-specific instructions.
// Empty string uses the default system prompt.
SystemPrompt string
// Timeout limits execution time. Zero means 5 minute default.
Timeout time.Duration
// OnOutput streams stderr output chunks as the subagent runs.
// Called from a goroutine; must be safe for concurrent use.
OnOutput func(chunk string)
// OnComplete is called when the subagent finishes (success or error).
// Called from a goroutine; must be safe for concurrent use.
OnComplete func(result SubagentResult)
// Blocking, when true, makes SpawnSubagent wait for completion and
// return the result directly. When false (default), spawns in background
// and returns immediately with a handle.
Blocking bool
// ParentSessionID links the subagent's session to the parent (optional).
// When set, the subagent's session is persisted with a parent reference.
ParentSessionID string
}
// SubagentResult contains the outcome of a subagent execution.
type SubagentResult struct {
// Response is the subagent's final text response.
Response string
// Error is set if the subagent failed (nil on success).
Error error
// ExitCode is the subprocess exit code (0 = success).
ExitCode int
// Elapsed is the total execution time.
Elapsed time.Duration
// Usage contains token usage if available.
Usage *SubagentUsage
}
// SubagentUsage contains token usage from the subagent's run.
type SubagentUsage struct {
InputTokens int64
OutputTokens int64
}
// SubagentHandle provides control over a running subagent.
type SubagentHandle struct {
// ID is a unique identifier for this subagent instance.
ID string
proc *os.Process
done chan struct{}
result *SubagentResult
mu sync.Mutex
}
// Kill terminates the subagent process.
func (h *SubagentHandle) Kill() error {
h.mu.Lock()
proc := h.proc
h.mu.Unlock()
if proc != nil {
return proc.Kill()
}
return nil
}
// Wait blocks until the subagent completes and returns the result.
func (h *SubagentHandle) Wait() SubagentResult {
<-h.done
h.mu.Lock()
defer h.mu.Unlock()
if h.result != nil {
return *h.result
}
return SubagentResult{Error: fmt.Errorf("subagent completed without result")}
}
// Done returns a channel that closes when the subagent completes.
func (h *SubagentHandle) Done() <-chan struct{} {
return h.done
}
// ---------------------------------------------------------------------------
// Internal helpers
// ---------------------------------------------------------------------------
// subagentJSONOutput matches the JSON envelope produced by `kit --json`.
type subagentJSONOutput struct {
Response string `json:"response"`
Usage *struct {
InputTokens int64 `json:"input_tokens"`
OutputTokens int64 `json:"output_tokens"`
} `json:"usage,omitempty"`
}
var subagentCounter uint64
func generateSubagentID() string {
n := atomic.AddUint64(&subagentCounter, 1)
return fmt.Sprintf("sub-%d-%d", time.Now().UnixNano(), n)
}
func findKitBinary() string {
// Try the current process executable first.
if exe, err := os.Executable(); err == nil {
if _, err := os.Stat(exe); err == nil {
return exe
}
}
// Fall back to PATH lookup.
if p, err := exec.LookPath("kit"); err == nil {
return p
}
return "kit"
}
// ---------------------------------------------------------------------------
// SpawnSubagent implementation
// ---------------------------------------------------------------------------
// SpawnSubagent spawns a child Kit instance to perform a task.
//
// When config.Blocking is true, blocks until completion and returns the result
// directly (handle is nil). When false, returns immediately with a handle for
// monitoring/cancellation.
//
// The subagent runs with --json --no-session --no-extensions flags by default,
// ensuring isolation from the parent's extensions and session state.
func SpawnSubagent(cfg SubagentConfig) (*SubagentHandle, *SubagentResult, error) {
if cfg.Prompt == "" {
return nil, nil, fmt.Errorf("prompt is required")
}
timeout := cfg.Timeout
if timeout == 0 {
timeout = 5 * time.Minute
}
kitBinary := findKitBinary()
// Build subprocess arguments.
args := []string{
"--json",
"--no-session",
"--no-extensions",
}
if cfg.Model != "" {
args = append(args, "--model", cfg.Model)
}
// Handle system prompt - write to temp file if provided.
var tmpFile *os.File
if cfg.SystemPrompt != "" {
var err error
tmpFile, err = os.CreateTemp("", "kit-subagent-*.txt")
if err != nil {
return nil, nil, fmt.Errorf("create temp file: %w", err)
}
if _, err := tmpFile.WriteString(cfg.SystemPrompt); err != nil {
_ = tmpFile.Close()
_ = os.Remove(tmpFile.Name())
return nil, nil, fmt.Errorf("write system prompt: %w", err)
}
_ = tmpFile.Close()
args = append(args, "--system-prompt", tmpFile.Name())
}
// Add the prompt as a positional argument.
args = append(args, cfg.Prompt)
// Create command with timeout context.
ctx, cancel := context.WithTimeout(context.Background(), timeout)
cmd := exec.CommandContext(ctx, kitBinary, args...)
cmd.Env = os.Environ()
stdout, err := cmd.StdoutPipe()
if err != nil {
cancel()
if tmpFile != nil {
_ = os.Remove(tmpFile.Name())
}
return nil, nil, fmt.Errorf("stdout pipe: %w", err)
}
stderr, err := cmd.StderrPipe()
if err != nil {
cancel()
if tmpFile != nil {
_ = os.Remove(tmpFile.Name())
}
return nil, nil, fmt.Errorf("stderr pipe: %w", err)
}
handle := &SubagentHandle{
ID: generateSubagentID(),
done: make(chan struct{}),
}
// Start the subprocess.
start := time.Now()
if err := cmd.Start(); err != nil {
cancel()
if tmpFile != nil {
_ = os.Remove(tmpFile.Name())
}
return nil, nil, fmt.Errorf("start subprocess: %w", err)
}
handle.mu.Lock()
handle.proc = cmd.Process
handle.mu.Unlock()
// Run the subprocess monitoring in a goroutine.
go func() {
defer close(handle.done)
defer cancel()
if tmpFile != nil {
defer func() { _ = os.Remove(tmpFile.Name()) }()
}
var wg sync.WaitGroup
var stdoutBuf strings.Builder
// Read stderr (live output).
wg.Go(func() {
scanner := bufio.NewScanner(stderr)
scanner.Buffer(make([]byte, 256*1024), 256*1024)
for scanner.Scan() {
line := scanner.Text()
if cfg.OnOutput != nil && strings.TrimSpace(line) != "" {
cfg.OnOutput(line + "\n")
}
}
})
// Read stdout (JSON output).
scanner := bufio.NewScanner(stdout)
scanner.Buffer(make([]byte, 256*1024), 256*1024)
for scanner.Scan() {
stdoutBuf.WriteString(scanner.Text() + "\n")
}
wg.Wait()
waitErr := cmd.Wait()
elapsed := time.Since(start)
// Build result.
result := SubagentResult{Elapsed: elapsed}
if waitErr != nil {
result.Error = waitErr
if exitErr, ok := waitErr.(*exec.ExitError); ok {
result.ExitCode = exitErr.ExitCode()
} else {
result.ExitCode = 1
}
}
// Parse JSON output.
raw := strings.TrimSpace(stdoutBuf.String())
var parsed subagentJSONOutput
if raw != "" && json.Unmarshal([]byte(raw), &parsed) == nil {
result.Response = parsed.Response
if parsed.Usage != nil {
result.Usage = &SubagentUsage{
InputTokens: parsed.Usage.InputTokens,
OutputTokens: parsed.Usage.OutputTokens,
}
}
} else {
// Fallback: use raw stdout.
result.Response = raw
}
handle.mu.Lock()
handle.result = &result
handle.proc = nil
handle.mu.Unlock()
if cfg.OnComplete != nil {
cfg.OnComplete(result)
}
}()
if cfg.Blocking {
// Wait for completion and return result directly.
<-handle.done
handle.mu.Lock()
r := handle.result
handle.mu.Unlock()
return nil, r, nil
}
return handle, nil, nil
}
+6
View File
@@ -110,6 +110,12 @@ func Symbols() interp.Exports {
"BeforeCompactEvent": reflect.ValueOf((*BeforeCompactEvent)(nil)),
"BeforeCompactResult": reflect.ValueOf((*BeforeCompactResult)(nil)),
// Subagent types
"SubagentConfig": reflect.ValueOf((*SubagentConfig)(nil)),
"SubagentResult": reflect.ValueOf((*SubagentResult)(nil)),
"SubagentUsage": reflect.ValueOf((*SubagentUsage)(nil)),
"SubagentHandle": reflect.ValueOf((*SubagentHandle)(nil)),
// Event structs
"ToolCallEvent": reflect.ValueOf((*ToolCallEvent)(nil)),
"ToolCallResult": reflect.ValueOf((*ToolCallResult)(nil)),
+193
View File
@@ -0,0 +1,193 @@
package models
import (
"context"
"sync"
"time"
"charm.land/fantasy"
)
// ProviderPool manages reusable LLM provider instances to reduce overhead
// when spawning multiple subagents or making repeated completion calls.
type ProviderPool struct {
mu sync.RWMutex
providers map[string]*pooledProvider
ttl time.Duration
closed bool
closeCh chan struct{}
}
type pooledProvider struct {
model fantasy.LanguageModel
closer func() error
providerOpts fantasy.ProviderOptions
created time.Time
lastUsed time.Time
refs int32
}
// DefaultPoolTTL is the default time-to-live for idle pooled providers.
const DefaultPoolTTL = 5 * time.Minute
// globalPool is the singleton provider pool instance.
var globalPool *ProviderPool
var poolOnce sync.Once
// GetGlobalPool returns the singleton provider pool instance.
func GetGlobalPool() *ProviderPool {
poolOnce.Do(func() {
globalPool = NewProviderPool(DefaultPoolTTL)
})
return globalPool
}
// NewProviderPool creates a provider pool with the given TTL for idle providers.
func NewProviderPool(ttl time.Duration) *ProviderPool {
p := &ProviderPool{
providers: make(map[string]*pooledProvider),
ttl: ttl,
closeCh: make(chan struct{}),
}
go p.cleanupLoop()
return p
}
// Get returns a provider for the model string, creating one if needed.
// The returned release function must be called when the provider is no longer
// needed. The provider may be reused by subsequent Get calls.
func (p *ProviderPool) Get(ctx context.Context, modelString string) (fantasy.LanguageModel, fantasy.ProviderOptions, func(), error) {
p.mu.Lock()
// Check if we have an existing provider.
if pp, ok := p.providers[modelString]; ok {
pp.refs++
pp.lastUsed = time.Now()
p.mu.Unlock()
return pp.model, pp.providerOpts, func() { p.release(modelString) }, nil
}
p.mu.Unlock()
// Create a new provider outside the lock.
config := &ProviderConfig{ModelString: modelString}
result, err := CreateProvider(ctx, config)
if err != nil {
return nil, nil, nil, err
}
p.mu.Lock()
defer p.mu.Unlock()
// Double-check: another goroutine may have created one while we were unlocked.
if pp, ok := p.providers[modelString]; ok {
// Close the one we just created and use the existing one.
if result.Closer != nil {
_ = result.Closer.Close()
}
pp.refs++
pp.lastUsed = time.Now()
return pp.model, pp.providerOpts, func() { p.release(modelString) }, nil
}
var closerFn func() error
if result.Closer != nil {
closerFn = result.Closer.Close
}
pp := &pooledProvider{
model: result.Model,
closer: closerFn,
providerOpts: result.ProviderOptions,
created: time.Now(),
lastUsed: time.Now(),
refs: 1,
}
p.providers[modelString] = pp
return pp.model, pp.providerOpts, func() { p.release(modelString) }, nil
}
func (p *ProviderPool) release(modelString string) {
p.mu.Lock()
defer p.mu.Unlock()
if pp, ok := p.providers[modelString]; ok {
pp.refs--
pp.lastUsed = time.Now()
}
}
func (p *ProviderPool) cleanupLoop() {
ticker := time.NewTicker(p.ttl / 2)
defer ticker.Stop()
for {
select {
case <-p.closeCh:
return
case <-ticker.C:
p.cleanup()
}
}
}
func (p *ProviderPool) cleanup() {
p.mu.Lock()
defer p.mu.Unlock()
now := time.Now()
for key, pp := range p.providers {
// Only clean up providers with no active references and past TTL.
if pp.refs <= 0 && now.Sub(pp.lastUsed) > p.ttl {
if pp.closer != nil {
_ = pp.closer()
}
delete(p.providers, key)
}
}
}
// Close shuts down the pool and releases all providers.
func (p *ProviderPool) Close() {
p.mu.Lock()
if p.closed {
p.mu.Unlock()
return
}
p.closed = true
close(p.closeCh)
for key, pp := range p.providers {
if pp.closer != nil {
_ = pp.closer()
}
delete(p.providers, key)
}
p.mu.Unlock()
}
// Stats returns current pool statistics.
func (p *ProviderPool) Stats() PoolStats {
p.mu.RLock()
defer p.mu.RUnlock()
stats := PoolStats{
TotalProviders: len(p.providers),
}
for _, pp := range p.providers {
if pp.refs > 0 {
stats.ActiveProviders++
} else {
stats.IdleProviders++
}
}
return stats
}
// PoolStats contains provider pool statistics.
type PoolStats struct {
TotalProviders int
ActiveProviders int
IdleProviders int
}
+4
View File
@@ -38,6 +38,10 @@ type SessionHeader struct {
Timestamp time.Time `json:"timestamp"` // creation time
Cwd string `json:"cwd"` // working directory
ParentSession string `json:"parent_session,omitempty"` // path to parent if forked
// Subagent fields (set when session is created by a subagent)
ParentSessionID string `json:"parent_session_id,omitempty"` // UUID of parent session
SubagentTask string `json:"subagent_task,omitempty"` // original task prompt
}
// Entry is the common structure shared by all tree entries (everything except
+32
View File
@@ -29,6 +29,12 @@ type SessionInfo struct {
// ParentSessionPath is the parent session path if this session was forked.
ParentSessionPath string
// ParentSessionID is the UUID of the parent session (for subagent sessions).
ParentSessionID string
// SubagentTask is the original task prompt (for subagent sessions).
SubagentTask string
// Created is when the session was first created.
Created time.Time
@@ -162,6 +168,8 @@ func extractSessionInfo(path string) (*SessionInfo, error) {
info.Created = h.Timestamp
info.Modified = h.Timestamp
info.ParentSessionPath = h.ParentSession
info.ParentSessionID = h.ParentSessionID
info.SubagentTask = h.SubagentTask
continue
}
@@ -245,3 +253,27 @@ func extractTextPreview(partsJSON json.RawMessage) string {
func DeleteSession(path string) error {
return os.Remove(path)
}
// ListChildSessions returns all sessions that have the given session ID as
// their parent. This is useful for finding subagent sessions spawned from
// a parent session. Results are sorted by creation time (newest first).
func ListChildSessions(parentID string) ([]SessionInfo, error) {
if parentID == "" {
return nil, nil
}
allSessions, err := ListAllSessions()
if err != nil {
return nil, err
}
var children []SessionInfo
for _, s := range allSessions {
if s.ParentSessionID == parentID {
children = append(children, s)
}
}
// Already sorted by modification time from ListAllSessions
return children, nil
}
+684
View File
@@ -0,0 +1,684 @@
# First-Class Subagent Support
**Status:** Proposal
**Author:** AI Assistant
**Date:** 2026-03-09
## Summary
Add first-class subagent support to Kit, enabling the LLM and extensions to spawn, manage, and orchestrate child Kit instances for parallel task delegation. This builds on proven patterns from existing extensions (`subagent-widget.go`, `kit-kit.go`) and promotes them to SDK/core APIs.
## Motivation
### Current State
Kit already supports subagents through **two working extension implementations**:
1. **`subagent-widget.go`** (807 lines) - Full lifecycle management:
- Tools: `subagent_create`, `subagent_continue`, `subagent_remove`, `subagent_list`
- Live widget dashboard showing status, elapsed time, output
- Conversation history persistence for multi-turn continuations
- Process management with kill signals
2. **`kit-kit.go`** (870 lines) - Multi-expert parallel research:
- Domain-specific system prompts from `.kit/agents/kit-kit/*.md`
- Parallel subprocess execution with goroutines
- Grid-based dashboard for concurrent agents
Both spawn Kit as subprocesses using `--json --no-session --no-extensions` and work today. However:
- Each extension reimplements subprocess spawning from scratch (~200 lines)
- No SDK API for extensions to spawn subagents easily
- No core tool for LLM-initiated subagent spawning
- No session tree relationships (subagents are ephemeral)
- Provider creation overhead (each subagent creates new HTTP clients)
### Goals
1. **SDK API** - `ctx.SpawnSubagent(config)` for extensions
2. **Core Tool** - Built-in `spawn_subagent` tool for LLM use
3. **Session Hierarchy** - Optional parent-child session linking
4. **Provider Efficiency** - Connection reuse for subagent spawns
5. **Lifecycle Management** - Cancellation propagation, timeouts
## Design
### Phase 1: SDK Convenience API (Easy)
Add a `SpawnSubagent` function to `ext.Context` that wraps the proven subprocess pattern.
#### Extension API (`ext` package)
```go
// SubagentConfig configures a subagent spawn.
type SubagentConfig struct {
// Prompt is the task/instruction for the subagent.
Prompt string
// Model overrides the parent's model (e.g. "anthropic/claude-haiku-3-5-20241022").
// Empty string uses the parent's current model.
Model string
// SystemPrompt provides domain-specific instructions.
// Empty string uses the default system prompt.
SystemPrompt string
// Timeout limits execution time. Zero means no timeout (not recommended).
Timeout time.Duration
// OnOutput streams stderr output chunks as the subagent runs.
// Called from a goroutine; must be safe for concurrent use.
OnOutput func(chunk string)
// OnComplete is called when the subagent finishes (success or error).
// Called from a goroutine; must be safe for concurrent use.
OnComplete func(result SubagentResult)
// Blocking, when true, makes SpawnSubagent wait for completion and
// return the result directly. When false (default), spawns in background
// and returns immediately with a handle.
Blocking bool
}
// SubagentResult contains the outcome of a subagent execution.
type SubagentResult struct {
// Response is the subagent's final text response.
Response string
// Error is set if the subagent failed.
Error error
// ExitCode is the subprocess exit code (0 = success).
ExitCode int
// Elapsed is the total execution time.
Elapsed time.Duration
// Usage contains token usage if available.
Usage *SubagentUsage
}
// SubagentUsage contains token usage from the subagent's run.
type SubagentUsage struct {
InputTokens int64
OutputTokens int64
}
// SubagentHandle provides control over a running subagent.
type SubagentHandle struct {
// ID is a unique identifier for this subagent instance.
ID string
// Kill terminates the subagent process.
Kill() error
// Wait blocks until the subagent completes and returns the result.
Wait() SubagentResult
// Done returns a channel that closes when the subagent completes.
Done() <-chan struct{}
}
```
#### Context Addition
```go
type Context struct {
// ... existing fields ...
// SpawnSubagent spawns a child Kit instance to perform a task.
// When config.Blocking is true, blocks until completion and returns
// the result directly (handle is nil). When false, returns immediately
// with a handle for monitoring/cancellation.
SpawnSubagent func(SubagentConfig) (*SubagentHandle, *SubagentResult, error)
}
```
#### Implementation (`internal/extensions/subagent.go`)
```go
package extensions
import (
"bufio"
"context"
"encoding/json"
"fmt"
"os"
"os/exec"
"sync"
"time"
)
// subagentJSONOutput matches the JSON envelope from `kit --json`.
type subagentJSONOutput struct {
Response string `json:"response"`
Usage *struct {
InputTokens int64 `json:"input_tokens"`
OutputTokens int64 `json:"output_tokens"`
} `json:"usage,omitempty"`
}
// SpawnSubagent implements the subagent spawning logic.
func SpawnSubagent(cfg SubagentConfig) (*SubagentHandle, *SubagentResult, error) {
if cfg.Prompt == "" {
return nil, nil, fmt.Errorf("prompt is required")
}
// Find the kit binary.
kitBinary := findKitBinary()
// Build subprocess arguments.
args := []string{
"--json",
"--no-session",
"--no-extensions",
}
if cfg.Model != "" {
args = append(args, "--model", cfg.Model)
}
if cfg.SystemPrompt != "" {
// Write system prompt to temp file.
tmpFile, err := os.CreateTemp("", "kit-subagent-*.txt")
if err != nil {
return nil, nil, fmt.Errorf("create temp file: %w", err)
}
if _, err := tmpFile.WriteString(cfg.SystemPrompt); err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
return nil, nil, fmt.Errorf("write system prompt: %w", err)
}
tmpFile.Close()
args = append(args, "--system-prompt", tmpFile.Name())
// Note: temp file cleanup handled after subprocess exits.
}
args = append(args, cfg.Prompt)
cmd := exec.Command(kitBinary, args...)
cmd.Env = os.Environ()
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, nil, fmt.Errorf("stdout pipe: %w", err)
}
stderr, err := cmd.StderrPipe()
if err != nil {
return nil, nil, fmt.Errorf("stderr pipe: %w", err)
}
handle := &SubagentHandle{
ID: generateSubagentID(),
done: make(chan struct{}),
}
// Start the subprocess.
start := time.Now()
if err := cmd.Start(); err != nil {
return nil, nil, fmt.Errorf("start subprocess: %w", err)
}
handle.proc = cmd.Process
// Run the subprocess monitoring in a goroutine.
go func() {
defer close(handle.done)
var wg sync.WaitGroup
var stdoutBuf strings.Builder
// Read stderr (live output).
wg.Add(1)
go func() {
defer wg.Done()
scanner := bufio.NewScanner(stderr)
scanner.Buffer(make([]byte, 256*1024), 256*1024)
for scanner.Scan() {
line := scanner.Text()
if cfg.OnOutput != nil && strings.TrimSpace(line) != "" {
cfg.OnOutput(line + "\n")
}
}
}()
// Read stdout (JSON output).
scanner := bufio.NewScanner(stdout)
scanner.Buffer(make([]byte, 256*1024), 256*1024)
for scanner.Scan() {
stdoutBuf.WriteString(scanner.Text() + "\n")
}
wg.Wait()
waitErr := cmd.Wait()
elapsed := time.Since(start)
// Parse result.
result := SubagentResult{Elapsed: elapsed}
if waitErr != nil {
result.Error = waitErr
if exitErr, ok := waitErr.(*exec.ExitError); ok {
result.ExitCode = exitErr.ExitCode()
} else {
result.ExitCode = 1
}
}
// Parse JSON output.
raw := strings.TrimSpace(stdoutBuf.String())
var parsed subagentJSONOutput
if raw != "" && json.Unmarshal([]byte(raw), &parsed) == nil {
result.Response = parsed.Response
if parsed.Usage != nil {
result.Usage = &SubagentUsage{
InputTokens: parsed.Usage.InputTokens,
OutputTokens: parsed.Usage.OutputTokens,
}
}
} else {
result.Response = raw
}
handle.result = &result
if cfg.OnComplete != nil {
cfg.OnComplete(result)
}
}()
if cfg.Blocking {
// Wait for completion and return result directly.
<-handle.done
return nil, handle.result, nil
}
return handle, nil, nil
}
```
### Phase 2: Core Tool (Easy)
Add a built-in `spawn_subagent` tool that the LLM can invoke directly.
#### Tool Definition (`internal/tools/subagent.go`)
```go
package tools
import (
"context"
"encoding/json"
"fmt"
"time"
"charm.land/fantasy"
"github.com/mark3labs/kit/internal/extensions"
)
// SubagentTool returns a tool that spawns Kit subagents.
func SubagentTool() fantasy.AgentTool {
return fantasy.NewTool(
"spawn_subagent",
`Spawn a background subagent to perform a task autonomously.
The subagent runs as a separate Kit instance with full tool access. Use this to:
- Delegate independent subtasks that can run in parallel
- Perform research or analysis without blocking your main work
- Execute tasks that benefit from a fresh context window
The subagent result is returned when it completes. For long-running tasks,
consider breaking them into smaller focused subtasks.
Example use cases:
- "Research the authentication patterns in this codebase"
- "Write unit tests for the UserService class"
- "Analyze the performance bottlenecks in the database queries"`,
func(ctx context.Context, input SubagentToolInput) (string, error) {
if input.Task == "" {
return "", fmt.Errorf("task is required")
}
timeout := 5 * time.Minute // Default timeout
if input.TimeoutSeconds > 0 {
timeout = time.Duration(input.TimeoutSeconds) * time.Second
}
_, result, err := extensions.SpawnSubagent(extensions.SubagentConfig{
Prompt: input.Task,
Model: input.Model,
SystemPrompt: input.SystemPrompt,
Timeout: timeout,
Blocking: true,
})
if err != nil {
return "", fmt.Errorf("spawn subagent: %w", err)
}
if result.Error != nil {
return fmt.Sprintf("Subagent failed (exit %d): %v\n\nPartial output:\n%s",
result.ExitCode, result.Error, result.Response), nil
}
return fmt.Sprintf("Subagent completed in %ds.\n\nResult:\n%s",
int(result.Elapsed.Seconds()), result.Response), nil
},
)
}
// SubagentToolInput defines the parameters for the spawn_subagent tool.
type SubagentToolInput struct {
// Task is the complete task description for the subagent.
Task string `json:"task" jsonschema:"description=The complete task description for the subagent to perform"`
// Model optionally overrides the model (e.g. "anthropic/claude-haiku-3-5-20241022" for faster/cheaper tasks).
Model string `json:"model,omitempty" jsonschema:"description=Optional model override for the subagent"`
// SystemPrompt optionally provides domain-specific instructions.
SystemPrompt string `json:"system_prompt,omitempty" jsonschema:"description=Optional system prompt for domain-specific guidance"`
// TimeoutSeconds limits execution time (default: 300 = 5 minutes).
TimeoutSeconds int `json:"timeout_seconds,omitempty" jsonschema:"description=Maximum execution time in seconds (default: 300)"`
}
```
#### Registration
Add to `internal/tools/core.go`:
```go
func CoreTools() []fantasy.AgentTool {
return []fantasy.AgentTool{
BashTool(),
ReadTool(),
WriteTool(),
EditTool(),
// ... existing tools ...
SubagentTool(), // NEW
}
}
```
### Phase 3: Session Hierarchy (Medium)
Add optional parent-child session linking so subagent conversations can be:
- Persisted and resumed
- Queried from the parent session
- Visualized in session tree views
#### CLI Flag
```
--parent-session <id> Link this session to a parent session ID
```
#### Session Header Extension
```go
// SessionHeader in internal/session/entry.go
type SessionHeader struct {
Type EntryType `json:"type"`
ID string `json:"id"`
Version string `json:"version"`
Cwd string `json:"cwd"`
Timestamp time.Time `json:"timestamp"`
ParentSession string `json:"parent_session,omitempty"` // existing
// NEW: For subagent sessions
ParentSessionID string `json:"parent_session_id,omitempty"` // UUID of parent
SubagentTask string `json:"subagent_task,omitempty"` // Original task prompt
}
```
#### Query Functions
```go
// ListChildSessions returns all sessions that have this session as their parent.
func ListChildSessions(parentID string) ([]SessionInfo, error) {
allSessions, err := ListAllSessions()
if err != nil {
return nil, err
}
var children []SessionInfo
for _, s := range allSessions {
if s.ParentSessionID == parentID {
children = append(children, s)
}
}
return children, nil
}
```
#### Subagent Config Extension
```go
type SubagentConfig struct {
// ... existing fields ...
// ParentSessionID links the subagent's session to the parent.
// When set, the subagent's session is persisted (not ephemeral)
// and can be queried/resumed later.
ParentSessionID string
}
```
### Phase 4: Provider Pooling (Medium)
Reduce overhead when spawning multiple subagents by reusing provider connections.
#### Provider Pool (`internal/models/pool.go`)
```go
package models
import (
"context"
"sync"
"time"
"charm.land/fantasy"
)
// ProviderPool manages reusable LLM provider instances.
type ProviderPool struct {
mu sync.RWMutex
providers map[string]*pooledProvider
ttl time.Duration
}
type pooledProvider struct {
model fantasy.LanguageModel
closer func() error
created time.Time
refs int
}
// NewProviderPool creates a provider pool with the given TTL for idle providers.
func NewProviderPool(ttl time.Duration) *ProviderPool {
p := &ProviderPool{
providers: make(map[string]*pooledProvider),
ttl: ttl,
}
go p.cleanupLoop()
return p
}
// Get returns a provider for the model string, creating one if needed.
func (p *ProviderPool) Get(ctx context.Context, modelString string) (fantasy.LanguageModel, func(), error) {
p.mu.Lock()
defer p.mu.Unlock()
if pp, ok := p.providers[modelString]; ok {
pp.refs++
return pp.model, func() { p.release(modelString) }, nil
}
// Create new provider.
config := &ProviderConfig{ModelString: modelString}
result, err := CreateProvider(ctx, config)
if err != nil {
return nil, nil, err
}
p.providers[modelString] = &pooledProvider{
model: result.Model,
closer: result.Closer.Close,
created: time.Now(),
refs: 1,
}
return result.Model, func() { p.release(modelString) }, nil
}
func (p *ProviderPool) release(modelString string) {
p.mu.Lock()
defer p.mu.Unlock()
if pp, ok := p.providers[modelString]; ok {
pp.refs--
}
}
func (p *ProviderPool) cleanupLoop() {
ticker := time.NewTicker(p.ttl / 2)
defer ticker.Stop()
for range ticker.C {
p.mu.Lock()
now := time.Now()
for key, pp := range p.providers {
if pp.refs == 0 && now.Sub(pp.created) > p.ttl {
pp.closer()
delete(p.providers, key)
}
}
p.mu.Unlock()
}
}
```
### Phase 5: Advanced Context Sharing (Hard - Future)
> **Note:** This phase requires significant design work and is deferred to a future proposal.
Options to explore:
1. **Selective History Injection** - Pass compressed parent history to subagent
2. **Shared Memory** - IPC mechanism for context sharing between processes
3. **In-Process Subagents** - Run subagents as goroutines sharing the same provider
## Implementation Plan
| Phase | Component | Effort | Files |
|-------|-----------|--------|-------|
| 1 | SDK API | 2-3 days | `internal/extensions/subagent.go`, `internal/extensions/api.go`, `ext/types.go` |
| 2 | Core Tool | 1 day | `internal/tools/subagent.go`, `internal/tools/core.go` |
| 3 | Session Hierarchy | 2-3 days | `internal/session/entry.go`, `internal/session/store.go`, `cmd/root.go` |
| 4 | Provider Pooling | 2-3 days | `internal/models/pool.go`, `internal/agent/agent.go` |
| 5 | Context Sharing | 1-2 weeks | TBD (future proposal) |
**Total for production-ready (Phases 1-4):** ~1-1.5 weeks
## Migration Path
Existing extensions (`subagent-widget.go`, `kit-kit.go`) can migrate incrementally:
```go
// Before: ~150 lines of subprocess management
cmd := exec.Command(kitBinary, args...)
stdout, _ := cmd.StdoutPipe()
stderr, _ := cmd.StderrPipe()
// ... pipe reading, JSON parsing, process lifecycle ...
// After: 10 lines
handle, _, _ := ctx.SpawnSubagent(ext.SubagentConfig{
Prompt: task,
OnOutput: func(chunk string) { state.appendChunk(chunk) },
OnComplete: func(result ext.SubagentResult) {
state.setStatus("done")
ctx.SendMessage(fmt.Sprintf("Subagent finished: %s", result.Response))
},
})
```
## Testing Strategy
1. **Unit Tests** - `internal/extensions/subagent_test.go`
- Config validation
- Timeout handling
- JSON parsing edge cases
2. **Integration Tests** - `internal/extensions/subagent_integration_test.go`
- Actual subprocess spawning
- Cancellation propagation
- Multiple concurrent subagents
3. **Extension Tests** - Update `subagent-widget.go` to use new API
- Verify backward compatibility
- Measure code reduction
4. **tmux TUI Tests** - Per AGENTS.md testing pattern
```bash
tmux new-session -d -s subtest "output/kit -e examples/extensions/subagent-widget.go"
tmux send-keys -t subtest '/sub research authentication patterns' Enter
sleep 10
tmux capture-pane -t subtest -p | grep -q "Subagent #1"
```
## Security Considerations
1. **Process Isolation** - Subagents run as separate processes (existing behavior)
2. **No Extension Inheritance** - `--no-extensions` prevents recursive loading
3. **Timeout Enforcement** - Default 5-minute timeout prevents runaway processes
4. **Resource Limits** - Consider adding `--max-steps` to subagent invocations
## Open Questions
1. **Should subagents inherit MCP servers?** Currently no (`--no-extensions`). Adding `--inherit-mcp` flag could enable tool sharing.
2. **Result size limits?** Current extensions truncate at 8-16KB. Should the SDK have a configurable limit?
3. **Parallel execution limits?** Should there be a max concurrent subagents setting to prevent resource exhaustion?
4. **Billing/quota tracking?** For teams with usage limits, should parent sessions aggregate subagent token usage?
## Appendix: Existing Extension Patterns
### subagent-widget.go Key Patterns
```go
// Process lifecycle management
type subState struct {
Proc *os.Process // active process for killing
Status string // "running", "done", "error"
History string // conversation history for /subcont
}
// Subprocess invocation
args := []string{"--json", "--no-session", "--no-extensions", prompt}
cmd := exec.Command(kitBinary, args...)
cmd.Start()
// Dual pipe reading
go func() { /* stderr -> live widget updates */ }()
go func() { /* stdout -> JSON result */ }()
// Result delivery
ctx.SendMessage(fmt.Sprintf("Subagent #%d finished: %s", id, result))
```
### kit-kit.go Key Patterns
```go
// Parallel expert execution
var wg sync.WaitGroup
for _, q := range queries {
wg.Add(1)
go func(expert, question string) {
defer wg.Done()
out, code, elapsed := queryExpert(expert, question)
// ...
}(q.Expert, q.Question)
}
wg.Wait()
// Domain-specific system prompts from files
def := parseAgentFile(filepath.Join(dir, "ext-expert.md"))
args = append(args, "--system-prompt", tmpFile.Name())
```