Compare commits

..

3 Commits

Author SHA1 Message Date
Ed Zynda 61408ed490 fix(sdk): infer ToolResponse.Type for binary data in NewTool/NewParallelTool
- Infer Type="image" for image/* MIME types and Type="media" for all
  other binary content so the downstream framework creates a media
  content block instead of silently discarding Data bytes (#17)
- Extract shared toolOutputToResponse() helper to eliminate duplication
- Add ImageResult() and MediaResult() convenience constructors
- Add LLMToolCall and LLMToolResponse type aliases so SDK consumers
  can call Tool.Run() without importing the underlying framework
- Add 6 regression tests covering image, media, and text responses

Closes #17
2026-04-22 16:58:07 +03:00
Ed Zynda 3cfb6437f9 perf(session,ui): reduce syscalls, allocations, and subprocess spam
- Buffer session JSONL writes with bufio.Writer, flush at sync points;
  ForkToNewSession and AddLLMMessages now batch N entries into ~1 syscall
- Cache lipgloss styles in style.CachedStyles, lazily built and
  invalidated on SetTheme; eliminates ~15 NewStyle() calls per frame in
  hot render paths (reasoning blocks, spinner, tool headers, margins)
- Cache git ls-files results for @file suggestions with 3s TTL; typing
  @filename no longer spawns 3 subprocesses per keystroke
- Use strings.Builder for StreamingMessageItem.content; eliminates O(n²)
  string copying during LLM response streaming
2026-04-22 16:48:17 +03:00
Ed Zynda d33ad4028b fix(kit): enable streaming for subagent child instances
- Set Streaming: true in subagent childOpts to prevent
  viper.Set("stream", false) from polluting global state
- Without this, concurrent subagents and the parent could read
  stale stream=false from viper, causing provider-level issues
  (e.g. Anthropic non-streaming timeouts with extended thinking)
2026-04-22 13:06:37 +03:00
14 changed files with 461 additions and 56 deletions
+22 -1
View File
@@ -646,7 +646,28 @@ host, _ := kit.New(ctx, &kit.Options{
})
```
Use `kit.NewParallelTool` for tools safe to run concurrently. See the [SDK docs](/sdk/overview) for full details on struct tags, `ToolOutput` fields, and `ToolCallIDFromContext`.
Use `kit.NewParallelTool` for tools safe to run concurrently. Binary data (images, audio, etc.) in `ToolOutput.Data` is automatically forwarded to the LLM when `MediaType` is set. See the [SDK docs](/sdk/overview) for full details on struct tags, `ToolOutput` fields, and `ToolCallIDFromContext`.
#### Return Helpers
| Helper | Description |
| --- | --- |
| `kit.TextResult(content)` | Successful text result |
| `kit.ErrorResult(content)` | Error result (LLM sees it as a tool error) |
| `kit.ImageResult(content, data, mediaType)` | Image result with binary data (e.g. `"image/png"`) |
| `kit.MediaResult(content, data, mediaType)` | Non-image media result (e.g. `"audio/mpeg"`) |
#### ToolOutput Fields
```go
kit.ToolOutput{
Content: "result text", // text returned to the LLM
IsError: false, // true = LLM sees this as an error
Data: pngBytes, // optional binary data (images, audio)
MediaType: "image/png", // MIME type for binary Data
Metadata: map[string]any{}, // opaque metadata for hooks/UI (not sent to LLM)
}
```
### With Callbacks
+80 -4
View File
@@ -63,6 +63,11 @@ type TreeManager struct {
// file is the open file handle for appending entries. Nil for in-memory.
file *os.File
// writer is a buffered writer wrapping file. Writes go through this
// buffer and are flushed to disk at explicit sync points (after each
// public Append* call, in Close, etc.) to reduce syscall overhead.
writer *bufio.Writer
}
// --- Constructors ---
@@ -105,11 +110,16 @@ func CreateTreeSession(cwd string) (*TreeManager, error) {
return nil, fmt.Errorf("failed to create session file: %w", err)
}
tm.file = f
tm.writer = bufio.NewWriter(f)
if err := tm.writeEntry(&header); err != nil {
_ = f.Close()
return nil, fmt.Errorf("failed to write session header: %w", err)
}
if err := tm.flushLocked(); err != nil {
_ = f.Close()
return nil, fmt.Errorf("failed to flush session header: %w", err)
}
return tm, nil
}
@@ -150,6 +160,7 @@ func (tm *TreeManager) ForkToNewSession(cwd string, targetID string) (*TreeManag
return nil, fmt.Errorf("failed to recreate session file: %w", err)
}
newTm.file = f
newTm.writer = bufio.NewWriter(f)
if err := newTm.writeEntry(&newTm.header); err != nil {
_ = f.Close()
@@ -289,6 +300,12 @@ func (tm *TreeManager) ForkToNewSession(cwd string, targetID string) (*TreeManag
}
}
// Flush all buffered writes from the fork in a single syscall.
if err := newTm.flushLocked(); err != nil {
_ = f.Close()
return nil, fmt.Errorf("failed to flush forked session: %w", err)
}
// Set the leaf to the last entry in the new session.
newTm.leafID = prevNewID
@@ -374,6 +391,7 @@ func OpenTreeSession(path string) (*TreeManager, error) {
return nil, fmt.Errorf("failed to open session file for append: %w", err)
}
tm.file = f
tm.writer = bufio.NewWriter(f)
return tm, nil
}
@@ -427,6 +445,9 @@ func (tm *TreeManager) AppendMessage(msg message.Message) (string, error) {
if err := tm.appendAndPersist(entry); err != nil {
return "", err
}
if err := tm.flushLocked(); err != nil {
return "", fmt.Errorf("failed to flush message: %w", err)
}
tm.leafID = entry.ID
return entry.ID, nil
@@ -451,6 +472,9 @@ func (tm *TreeManager) AppendModelChange(provider, modelID string) (string, erro
if err := tm.appendAndPersist(entry); err != nil {
return "", err
}
if err := tm.flushLocked(); err != nil {
return "", fmt.Errorf("failed to flush model change: %w", err)
}
tm.leafID = entry.ID
return entry.ID, nil
@@ -465,6 +489,9 @@ func (tm *TreeManager) AppendBranchSummary(fromID, summary string) (string, erro
if err := tm.appendAndPersist(entry); err != nil {
return "", err
}
if err := tm.flushLocked(); err != nil {
return "", fmt.Errorf("failed to flush branch summary: %w", err)
}
tm.leafID = entry.ID
return entry.ID, nil
@@ -479,6 +506,9 @@ func (tm *TreeManager) AppendLabel(targetID, label string) (string, error) {
if err := tm.appendAndPersist(entry); err != nil {
return "", err
}
if err := tm.flushLocked(); err != nil {
return "", fmt.Errorf("failed to flush label: %w", err)
}
tm.labels[targetID] = label
tm.leafID = entry.ID
@@ -494,6 +524,9 @@ func (tm *TreeManager) AppendSessionInfo(name string) (string, error) {
if err := tm.appendAndPersist(entry); err != nil {
return "", err
}
if err := tm.flushLocked(); err != nil {
return "", fmt.Errorf("failed to flush session info: %w", err)
}
tm.sessionName = name
tm.leafID = entry.ID
@@ -510,6 +543,9 @@ func (tm *TreeManager) AppendExtensionData(extType, data string) (string, error)
if err := tm.appendAndPersist(entry); err != nil {
return "", err
}
if err := tm.flushLocked(); err != nil {
return "", fmt.Errorf("failed to flush extension data: %w", err)
}
tm.leafID = entry.ID
return entry.ID, nil
@@ -541,6 +577,9 @@ func (tm *TreeManager) AppendCompaction(summary, firstKeptEntryID string, tokens
if err := tm.appendAndPersist(entry); err != nil {
return "", err
}
if err := tm.flushLocked(); err != nil {
return "", fmt.Errorf("failed to flush compaction: %w", err)
}
tm.leafID = entry.ID
return entry.ID, nil
@@ -926,11 +965,31 @@ func (tm *TreeManager) IsEmpty() bool {
return tm.MessageCount() == 0
}
// Close closes the underlying file handle.
// Flush writes any buffered data to the underlying file.
func (tm *TreeManager) Flush() error {
tm.mu.Lock()
defer tm.mu.Unlock()
return tm.flushLocked()
}
// flushLocked writes buffered data to disk. Caller must hold the lock.
func (tm *TreeManager) flushLocked() error {
if tm.writer != nil {
return tm.writer.Flush()
}
return nil
}
// Close flushes any buffered writes and closes the underlying file handle.
func (tm *TreeManager) Close() error {
tm.mu.Lock()
defer tm.mu.Unlock()
if tm.file != nil {
// Flush buffered data before closing.
if tm.writer != nil {
_ = tm.writer.Flush()
tm.writer = nil
}
err := tm.file.Close()
tm.file = nil
return err
@@ -1090,13 +1149,22 @@ func (tm *TreeManager) GetLastCompaction() *CompactionEntry {
// AddLLMMessages appends multiple LLM messages as entries. This is
// used when syncing from the agent's ConversationMessages after a step.
// All entries are buffered and flushed to disk in a single batch.
func (tm *TreeManager) AddLLMMessages(msgs []fantasy.Message) error {
tm.mu.Lock()
defer tm.mu.Unlock()
for _, msg := range msgs {
if _, err := tm.AppendLLMMessage(msg); err != nil {
entry, err := NewMessageEntry(tm.leafID, message.FromLLMMessage(msg))
if err != nil {
return err
}
if err := tm.appendAndPersist(entry); err != nil {
return err
}
tm.leafID = entry.ID
}
return nil
return tm.flushLocked()
}
// Deprecated: Use AddLLMMessages instead.
@@ -1148,12 +1216,20 @@ func (tm *TreeManager) appendAndPersist(entry any) error {
return nil
}
// writeEntry serializes an entry and appends it as a line to the file.
// writeEntry serializes an entry and appends it to the buffered writer.
// The data is not flushed to disk until flushLocked is called.
func (tm *TreeManager) writeEntry(entry any) error {
data, err := json.Marshal(entry)
if err != nil {
return fmt.Errorf("failed to marshal entry: %w", err)
}
if tm.writer != nil {
if _, err := tm.writer.Write(data); err != nil {
return err
}
return tm.writer.WriteByte('\n')
}
// Fallback for direct file writes (shouldn't happen in normal flow).
data = append(data, '\n')
_, err = tm.file.Write(data)
return err
+48 -1
View File
@@ -6,6 +6,8 @@ import (
"path/filepath"
"sort"
"strings"
"sync"
"time"
)
// FileSuggestion represents a single file, directory, or MCP resource
@@ -31,6 +33,51 @@ type FileSuggestion struct {
// maxFileSuggestions is the maximum number of file suggestions returned.
const maxFileSuggestions = 20
// fileListCache caches the result of listFiles() keyed by directory to avoid
// re-running git subprocesses on every keystroke during @file completion.
var fileListCache struct {
mu sync.Mutex
dir string // searchDir that produced the cached entries
cwd string // cwd used for the git query
entries []FileSuggestion // cached file list
expireAt time.Time // when the cache entry expires
}
// fileListCacheTTL controls how long a cached file list stays valid.
// During rapid typing the list is reused; after the TTL a fresh git
// ls-files is executed so newly created files become visible.
const fileListCacheTTL = 3 * time.Second
// getCachedFileList returns the file list for searchDir, using a short-lived
// cache to avoid repeated subprocess calls during @file autocompletion.
func getCachedFileList(searchDir, cwd string) []FileSuggestion {
fileListCache.mu.Lock()
defer fileListCache.mu.Unlock()
now := time.Now()
if fileListCache.dir == searchDir &&
fileListCache.cwd == cwd &&
now.Before(fileListCache.expireAt) {
// Return a copy so callers can mutate (e.g. prepend baseDir).
cp := make([]FileSuggestion, len(fileListCache.entries))
copy(cp, fileListCache.entries)
return cp
}
// Cache miss or expired — run the real (potentially expensive) lookup.
files := listFiles(searchDir, cwd)
fileListCache.dir = searchDir
fileListCache.cwd = cwd
fileListCache.entries = files
fileListCache.expireAt = now.Add(fileListCacheTTL)
// Return a copy.
cp := make([]FileSuggestion, len(files))
copy(cp, files)
return cp
}
// ExtractAtPrefix checks the current line for an @-file trigger at cursorCol.
// It returns:
// - hasAt: true if a valid @ trigger was found
@@ -99,7 +146,7 @@ func GetFileSuggestions(prefix string, cwd string) []FileSuggestion {
}
}
files := listFiles(searchDir, cwd)
files := getCachedFileList(searchDir, cwd)
if len(files) == 0 {
return nil
}
+6 -8
View File
@@ -109,8 +109,8 @@ func (m *TextMessageItem) renderContent(width int) string {
// It accumulates content chunks and re-renders on each update for live display.
type StreamingMessageItem struct {
id string
role string // "assistant" or "reasoning"
content string // Accumulated streaming content
role string // "assistant" or "reasoning"
content strings.Builder // Accumulated streaming content
timestamp time.Time
startTime time.Time // When streaming started (for live duration counter)
modelName string
@@ -156,10 +156,10 @@ func (s *StreamingMessageItem) Render(width int) string {
durationMs = time.Since(s.startTime).Milliseconds()
}
ty := createTypography(style.GetTheme())
rendered = render.ReasoningBlock(s.content, durationMs, width, ty, style.GetTheme())
rendered = render.ReasoningBlock(s.content.String(), durationMs, width, ty, style.GetTheme())
} else {
// Render as assistant message
rendered = render.AssistantBlock(s.content, width, style.GetTheme())
rendered = render.AssistantBlock(s.content.String(), width, style.GetTheme())
}
// Cache and return (but reasoning is never cached due to live duration)
@@ -187,7 +187,7 @@ func (s *StreamingMessageItem) Height() int {
// AppendChunk adds a content chunk and invalidates the render cache.
func (s *StreamingMessageItem) AppendChunk(chunk string) {
s.content += chunk
s.content.WriteString(chunk)
s.cachedWidth = 0 // Invalidate cache
}
@@ -243,9 +243,7 @@ func (m *StreamingBashOutputItem) Render(width int) string {
// Header with command
if m.command != "" {
headerStyle := lipgloss.NewStyle().
Foreground(theme.Muted).
Italic(true)
headerStyle := style.GetCachedStyles().BashHeader
parts = append(parts, headerStyle.Render(fmt.Sprintf("▸ %s", m.command)))
}
+1 -1
View File
@@ -338,7 +338,7 @@ func (r *MessageRenderer) RenderToolMessage(toolName, toolArgs, toolResult strin
// Build the content: icon + name + params on first line, then body
headerLine := styledIcon + " " + styledName
if params != "" {
headerLine += " " + lipgloss.NewStyle().Foreground(theme.Muted).Render(params)
headerLine += " " + style.GetCachedStyles().ToolMuted.Render(params)
}
// Get body content
+7 -7
View File
@@ -45,7 +45,7 @@ func UserBlock(content string, width int, ty *herald.Typography, theme style.The
// HighlightFileTokens wraps @file tokens in the given text with the theme
// accent color so they stand out visually in rendered user messages.
func HighlightFileTokens(text string, theme style.Theme) string {
accentStyle := lipgloss.NewStyle().Foreground(theme.Accent).Bold(true)
accentStyle := style.GetCachedStyles().FileTokenAccent
return fileTokenPattern.ReplaceAllStringFunc(text, func(token string) string {
return accentStyle.Render(token)
})
@@ -75,8 +75,8 @@ func ReasoningBlock(content string, duration int64, width int, ty *herald.Typogr
if width > 4 {
contentStr = wrapText(contentStr, width-4)
}
mutedStyle := lipgloss.NewStyle().Foreground(theme.Muted)
contentRendered := mutedStyle.Render(ty.Italic(contentStr))
cs := style.GetCachedStyles()
contentRendered := cs.Muted.Render(ty.Italic(contentStr))
// Build label based on duration
if duration > 0 {
@@ -86,14 +86,14 @@ func ReasoningBlock(content string, duration int64, width int, ty *herald.Typogr
} else {
durationStr = fmt.Sprintf("%.1fs", float64(duration)/1000)
}
labelPart := lipgloss.NewStyle().Foreground(theme.VeryMuted).Render("Thought for ")
durationPart := lipgloss.NewStyle().Foreground(theme.Accent).Render(durationStr)
labelPart := cs.VeryMuted.Render("Thought for ")
durationPart := cs.Accent.Render(durationStr)
label := labelPart + durationPart
rendered := contentRendered + "\n" + label
return styleMarginBottom(theme, rendered)
}
label := lipgloss.NewStyle().Foreground(theme.VeryMuted).Render("Thought")
label := cs.VeryMuted.Render("Thought")
rendered := contentRendered + "\n" + label
return styleMarginBottom(theme, rendered)
@@ -194,7 +194,7 @@ func ToolBlock(displayName, params, body string, isError bool, width int, ty *he
// styleMarginBottom applies a 1-line margin bottom using the theme.
func styleMarginBottom(theme style.Theme, content string) string {
return lipgloss.NewStyle().MarginBottom(1).Render(content)
return style.GetCachedStyles().MarginBottom1.Render(content)
}
// wrapText soft-wraps a string to the given width using lipgloss, which is
+9 -11
View File
@@ -21,12 +21,11 @@ func knightRiderFrames() []string {
const numDots = 8
const dot = "▪"
theme := style.GetTheme()
bright := lipgloss.NewStyle().Foreground(theme.Primary)
med := lipgloss.NewStyle().Foreground(theme.Muted)
dim := lipgloss.NewStyle().Foreground(theme.VeryMuted)
off := lipgloss.NewStyle().Foreground(theme.MutedBorder)
cs := style.GetCachedStyles()
bright := cs.SpinnerBright
med := cs.SpinnerMed
dim := cs.SpinnerDim
off := cs.SpinnerOff
// Scanner bounces: 0→7→0
positions := make([]int, 0, 2*numDots-2)
@@ -476,9 +475,8 @@ func (s *StreamComponent) renderReasoningBlock(reasoning string) string {
if s.width > 4 {
content = lipgloss.NewStyle().Width(s.width - 4).Render(content)
}
theme := GetTheme()
mutedStyle := lipgloss.NewStyle().Foreground(theme.Muted)
parts = append(parts, mutedStyle.Render(s.ty.Italic(content)))
cs := style.GetCachedStyles()
parts = append(parts, cs.Muted.Render(s.ty.Italic(content)))
// Duration footer with VeryMuted label and Accent duration.
var duration time.Duration
@@ -494,8 +492,8 @@ func (s *StreamComponent) renderReasoningBlock(reasoning string) string {
} else {
durationStr = fmt.Sprintf("%.1fs", duration.Seconds())
}
label := lipgloss.NewStyle().Foreground(theme.VeryMuted).Render("Thought for ")
durationStyled := lipgloss.NewStyle().Foreground(theme.Accent).Render(durationStr)
label := cs.VeryMuted.Render("Thought for ")
durationStyled := cs.Accent.Render(durationStr)
parts = append(parts, label+durationStyled)
}
+64
View File
@@ -40,6 +40,70 @@ func GetTheme() Theme {
func SetTheme(theme Theme) {
currentTheme = theme
markdownTypographyCache = nil // invalidate cached renderer; colors may have changed
styleCache = nil // invalidate cached styles; colors may have changed
}
// CachedStyles holds pre-built lipgloss styles that are reused across
// render frames. Invalidated by SetTheme, lazily rebuilt on next access.
// Only accessed from BubbleTea's single-threaded Update/View cycle.
type CachedStyles struct {
// render/blocks.go
FileTokenAccent lipgloss.Style // Foreground(Accent).Bold(true)
Muted lipgloss.Style // Foreground(Muted)
VeryMuted lipgloss.Style // Foreground(VeryMuted)
Accent lipgloss.Style // Foreground(Accent)
MarginBottom1 lipgloss.Style // MarginBottom(1)
// stream.go - spinner phases
SpinnerBright lipgloss.Style // Foreground(Primary)
SpinnerMed lipgloss.Style // Foreground(Muted)
SpinnerDim lipgloss.Style // Foreground(VeryMuted)
SpinnerOff lipgloss.Style // Foreground(MutedBorder)
// message_items.go - bash output
BashHeader lipgloss.Style // Foreground(Muted).Italic(true)
BashStderr lipgloss.Style // Foreground(Error)
// render/blocks.go - tool block
ToolSuccess lipgloss.Style // Foreground(Success)
ToolError lipgloss.Style // Foreground(Error)
ToolInfo lipgloss.Style // Foreground(Info).Bold(true)
ToolMuted lipgloss.Style // Foreground(Muted)
// common
ErrorFg lipgloss.Style // Foreground(Error)
TextBold lipgloss.Style // Foreground(Text).Bold(true)
}
var styleCache *CachedStyles
// GetCachedStyles returns the pre-built style cache, creating it lazily
// from the current theme. Invalidated by SetTheme.
func GetCachedStyles() *CachedStyles {
if styleCache != nil {
return styleCache
}
theme := GetTheme()
styleCache = &CachedStyles{
FileTokenAccent: lipgloss.NewStyle().Foreground(theme.Accent).Bold(true),
Muted: lipgloss.NewStyle().Foreground(theme.Muted),
VeryMuted: lipgloss.NewStyle().Foreground(theme.VeryMuted),
Accent: lipgloss.NewStyle().Foreground(theme.Accent),
MarginBottom1: lipgloss.NewStyle().MarginBottom(1),
SpinnerBright: lipgloss.NewStyle().Foreground(theme.Primary),
SpinnerMed: lipgloss.NewStyle().Foreground(theme.Muted),
SpinnerDim: lipgloss.NewStyle().Foreground(theme.VeryMuted),
SpinnerOff: lipgloss.NewStyle().Foreground(theme.MutedBorder),
BashHeader: lipgloss.NewStyle().Foreground(theme.Muted).Italic(true),
BashStderr: lipgloss.NewStyle().Foreground(theme.Error),
ToolSuccess: lipgloss.NewStyle().Foreground(theme.Success),
ToolError: lipgloss.NewStyle().Foreground(theme.Error),
ToolInfo: lipgloss.NewStyle().Foreground(theme.Info).Bold(true),
ToolMuted: lipgloss.NewStyle().Foreground(theme.Muted),
ErrorFg: lipgloss.NewStyle().Foreground(theme.Error),
TextBold: lipgloss.NewStyle().Foreground(theme.Text).Bold(true),
}
return styleCache
}
// MarkdownThemeColors defines colors for markdown rendering and syntax highlighting.
+7
View File
@@ -1781,12 +1781,19 @@ func (m *Kit) Subagent(ctx context.Context, cfg SubagentConfig) (*SubagentResult
// Create child Kit instance. Pass the parent's loaded MCP config to
// avoid re-reading viper (which races with concurrent subagent spawns).
// Streaming must be explicitly enabled — Options.Streaming defaults to
// false, and New() unconditionally writes viper.Set("stream", opts.Streaming).
// Without this, the subagent would (a) pollute viper global state for
// other concurrent callers and (b) potentially hit provider-level
// differences (e.g. Anthropic non-streaming timeouts with extended
// thinking).
childOpts := &Options{
Model: model,
SystemPrompt: systemPrompt,
Tools: tools,
NoSession: cfg.NoSession,
Quiet: true,
Streaming: true,
MCPConfig: m.mcpConfig,
}
child, err := New(ctx, childOpts)
+52 -22
View File
@@ -2,6 +2,7 @@ package kit
import (
"context"
"strings"
"charm.land/fantasy"
@@ -52,6 +53,22 @@ func ErrorResult(content string) ToolOutput {
return ToolOutput{Content: content, IsError: true}
}
// ImageResult creates a [ToolOutput] that returns an image to the LLM.
// The data is the raw image bytes and mediaType is the MIME type
// (e.g. "image/png", "image/jpeg"). The optional text content accompanies
// the image and is visible to the LLM alongside it.
func ImageResult(content string, data []byte, mediaType string) ToolOutput {
return ToolOutput{Content: content, Data: data, MediaType: mediaType}
}
// MediaResult creates a [ToolOutput] that returns non-image binary media
// (e.g. audio, video) to the LLM. The data is the raw bytes and mediaType
// is the MIME type (e.g. "audio/wav", "video/mp4"). The optional text
// content accompanies the media.
func MediaResult(content string, data []byte, mediaType string) ToolOutput {
return ToolOutput{Content: content, Data: data, MediaType: mediaType}
}
// toolCallIDKey is the context key for the tool call ID.
type toolCallIDKey struct{}
@@ -63,9 +80,35 @@ func ToolCallIDFromContext(ctx context.Context) string {
return s
}
// toolOutputToResponse converts a [ToolOutput] into the underlying
// framework's ToolResponse, inferring the response Type from Data/MediaType
// so that binary content (images, audio, etc.) is forwarded to the LLM
// instead of being silently dropped.
func toolOutputToResponse(result ToolOutput) fantasy.ToolResponse {
resp := fantasy.ToolResponse{
Content: result.Content,
IsError: result.IsError,
Data: result.Data,
MediaType: result.MediaType,
}
// Infer response type from binary data so the downstream framework
// creates a media content block instead of a plain-text one.
if len(result.Data) > 0 && result.MediaType != "" {
if strings.HasPrefix(result.MediaType, "image/") {
resp.Type = "image"
} else {
resp.Type = "media"
}
}
if result.Metadata != nil {
resp = fantasy.WithResponseMetadata(resp, result.Metadata)
}
return resp
}
// NewTool creates a custom [Tool] with automatic JSON schema generation from
// the TInput struct type. The handler receives a typed input (deserialized
// from the LLM's JSON arguments) and returns a [ToolResult].
// from the LLM's JSON arguments) and returns a [ToolOutput].
//
// Struct tags on TInput control the generated schema:
//
@@ -77,6 +120,11 @@ func ToolCallIDFromContext(ctx context.Context) string {
// The tool call ID is injected into the context and can be retrieved with
// [ToolCallIDFromContext].
//
// Binary results: When [ToolOutput.Data] and [ToolOutput.MediaType] are set,
// the response type is automatically inferred so the LLM receives the binary
// content (e.g. an image) instead of only the text. Use [ImageResult] or
// [MediaResult] for convenience.
//
// Example:
//
// type WeatherInput struct {
@@ -84,7 +132,7 @@ func ToolCallIDFromContext(ctx context.Context) string {
// }
//
// tool := kit.NewTool("get_weather", "Get weather for a city",
// func(ctx context.Context, input WeatherInput) (kit.ToolResult, error) {
// func(ctx context.Context, input WeatherInput) (kit.ToolOutput, error) {
// return kit.TextResult("72°F, sunny in " + input.City), nil
// },
// )
@@ -96,16 +144,7 @@ func NewTool[TInput any](name, description string, fn func(ctx context.Context,
if err != nil {
return fantasy.NewTextErrorResponse(err.Error()), nil
}
resp := fantasy.ToolResponse{
Content: result.Content,
IsError: result.IsError,
Data: result.Data,
MediaType: result.MediaType,
}
if result.Metadata != nil {
resp = fantasy.WithResponseMetadata(resp, result.Metadata)
}
return resp, nil
return toolOutputToResponse(result), nil
},
)
}
@@ -121,16 +160,7 @@ func NewParallelTool[TInput any](name, description string, fn func(ctx context.C
if err != nil {
return fantasy.NewTextErrorResponse(err.Error()), nil
}
resp := fantasy.ToolResponse{
Content: result.Content,
IsError: result.IsError,
Data: result.Data,
MediaType: result.MediaType,
}
if result.Metadata != nil {
resp = fantasy.WithResponseMetadata(resp, result.Metadata)
}
return resp, nil
return toolOutputToResponse(result), nil
},
)
}
+146
View File
@@ -117,3 +117,149 @@ func TestToolOutput_BinaryData(t *testing.T) {
t.Errorf("MediaType = %q, want %q", r.MediaType, "image/png")
}
}
// TestImageResult verifies the ImageResult convenience constructor.
func TestImageResult(t *testing.T) {
data := []byte{0x89, 0x50, 0x4E, 0x47}
r := kit.ImageResult("here is the image", data, "image/png")
if r.Content != "here is the image" {
t.Errorf("Content = %q, want %q", r.Content, "here is the image")
}
if len(r.Data) != 4 {
t.Errorf("Data len = %d, want 4", len(r.Data))
}
if r.MediaType != "image/png" {
t.Errorf("MediaType = %q, want %q", r.MediaType, "image/png")
}
if r.IsError {
t.Error("ImageResult should not set IsError")
}
}
// TestMediaResult verifies the MediaResult convenience constructor.
func TestMediaResult(t *testing.T) {
data := []byte{0xFF, 0xFB, 0x90, 0x00}
r := kit.MediaResult("audio clip", data, "audio/mpeg")
if r.Content != "audio clip" {
t.Errorf("Content = %q, want %q", r.Content, "audio clip")
}
if len(r.Data) != 4 {
t.Errorf("Data len = %d, want 4", len(r.Data))
}
if r.MediaType != "audio/mpeg" {
t.Errorf("MediaType = %q, want %q", r.MediaType, "audio/mpeg")
}
if r.IsError {
t.Error("MediaResult should not set IsError")
}
}
// TestNewTool_BinaryImageResponse verifies that NewTool correctly infers the
// response type for image data so binary content is forwarded to the LLM
// (issue #17).
func TestNewTool_BinaryImageResponse(t *testing.T) {
type Input struct {
Path string `json:"path"`
}
imgData := []byte{0x89, 0x50, 0x4E, 0x47} // PNG magic bytes
tool := kit.NewTool("read_image", "Read an image file",
func(ctx context.Context, input Input) (kit.ToolOutput, error) {
return kit.ImageResult("Here is the image", imgData, "image/png"), nil
},
)
// Run the tool and inspect the raw ToolResponse via the AgentTool interface.
resp, err := tool.Run(context.Background(), kit.LLMToolCall{
ID: "call_1",
Name: "read_image",
Input: `{"path": "test.png"}`,
})
if err != nil {
t.Fatalf("Run() error: %v", err)
}
// The Type field must be "image" so the downstream framework creates a
// media content block instead of discarding the binary data.
if resp.Type != "image" {
t.Errorf("ToolResponse.Type = %q, want %q", resp.Type, "image")
}
if len(resp.Data) != 4 {
t.Errorf("ToolResponse.Data len = %d, want 4", len(resp.Data))
}
if resp.MediaType != "image/png" {
t.Errorf("ToolResponse.MediaType = %q, want %q", resp.MediaType, "image/png")
}
if resp.Content != "Here is the image" {
t.Errorf("ToolResponse.Content = %q, want %q", resp.Content, "Here is the image")
}
}
// TestNewTool_BinaryMediaResponse verifies type inference for non-image media.
func TestNewTool_BinaryMediaResponse(t *testing.T) {
type Input struct{}
tool := kit.NewTool("get_audio", "Get audio",
func(ctx context.Context, input Input) (kit.ToolOutput, error) {
return kit.MediaResult("audio clip", []byte{0xFF, 0xFB}, "audio/mpeg"), nil
},
)
resp, err := tool.Run(context.Background(), kit.LLMToolCall{
ID: "call_2",
Name: "get_audio",
Input: `{}`,
})
if err != nil {
t.Fatalf("Run() error: %v", err)
}
if resp.Type != "media" {
t.Errorf("ToolResponse.Type = %q, want %q", resp.Type, "media")
}
}
// TestNewTool_TextResponseTypeNotSet verifies that text-only responses do NOT
// get an inferred type (preserving existing behavior).
func TestNewTool_TextResponseTypeNotSet(t *testing.T) {
type Input struct{}
tool := kit.NewTool("echo", "Echo",
func(ctx context.Context, input Input) (kit.ToolOutput, error) {
return kit.TextResult("hello"), nil
},
)
resp, err := tool.Run(context.Background(), kit.LLMToolCall{
ID: "call_3", Name: "echo", Input: `{}`,
})
if err != nil {
t.Fatalf("Run() error: %v", err)
}
// Text responses should not have Type set (the framework treats "" as text).
if resp.Type != "" {
t.Errorf("ToolResponse.Type = %q, want empty string for text responses", resp.Type)
}
}
// TestNewParallelTool_BinaryImageResponse mirrors the NewTool binary test for
// NewParallelTool.
func TestNewParallelTool_BinaryImageResponse(t *testing.T) {
type Input struct{}
tool := kit.NewParallelTool("snap", "Take a snapshot",
func(ctx context.Context, input Input) (kit.ToolOutput, error) {
return kit.ImageResult("snapshot", []byte{0xFF, 0xD8}, "image/jpeg"), nil
},
)
resp, err := tool.Run(context.Background(), kit.LLMToolCall{
ID: "call_4", Name: "snap", Input: `{}`,
})
if err != nil {
t.Fatalf("Run() error: %v", err)
}
if resp.Type != "image" {
t.Errorf("ToolResponse.Type = %q, want %q", resp.Type, "image")
}
}
+12
View File
@@ -157,6 +157,18 @@ type LLMTextPart = fantasy.TextPart
// LLMReasoningPart is a reasoning/chain-of-thought content part.
type LLMReasoningPart = fantasy.ReasoningPart
// LLMToolCall represents the raw tool invocation passed to a [Tool]'s Run
// method. It carries the call ID, tool name, and the JSON-encoded input
// arguments from the LLM. This is the execution-layer call object — distinct
// from [ToolCall] (a message content part).
type LLMToolCall = fantasy.ToolCall
// LLMToolResponse represents the raw response returned from a [Tool]'s Run
// method. Most SDK consumers should use [ToolOutput] with [NewTool] /
// [NewParallelTool] instead — this alias is provided for advanced use cases
// that need to call Tool.Run() directly (e.g. testing).
type LLMToolResponse = fantasy.ToolResponse
// LLMToolCallPart represents an LLM-initiated tool invocation within a message.
type LLMToolCallPart = fantasy.ToolCallPart
+4
View File
@@ -493,6 +493,8 @@ host, _ := kit.New(ctx, &kit.Options{
|----------|-------------|
| `kit.TextResult(content)` | Successful text result |
| `kit.ErrorResult(content)` | Error result (LLM sees it as a tool error) |
| `kit.ImageResult(content, data, mediaType)` | Image result with binary data (e.g. `"image/png"`) |
| `kit.MediaResult(content, data, mediaType)` | Non-image media result (e.g. `"audio/mpeg"`) |
**ToolOutput fields** (for advanced use):
@@ -1095,6 +1097,8 @@ kit.LLMUsage // {InputTokens, OutputTokens, TotalTokens, ReasoningTokens,
// CacheCreationTokens, CacheReadTokens}
kit.LLMResponse // {Content, FinishReason, Usage}
kit.LLMFilePart // {Filename, Data []byte, MediaType}
kit.LLMToolCall // {ID, Name, Input string} — execution-layer tool call (for Tool.Run)
kit.LLMToolResponse // {Type, Content, Data, MediaType, IsError, ...} — raw tool response
// Compaction types
kit.CompactionResult, kit.CompactionOptions
+3 -1
View File
@@ -101,8 +101,10 @@ Return values:
|--------|-------------|
| `kit.TextResult(s)` | Successful text result |
| `kit.ErrorResult(s)` | Error result (LLM sees it as a tool error) |
| `kit.ImageResult(s, data, mediaType)` | Image result with binary data (e.g. `"image/png"`) |
| `kit.MediaResult(s, data, mediaType)` | Non-image media result (e.g. `"audio/mpeg"`) |
For advanced use, return a `kit.ToolOutput` struct directly with `Data`, `MediaType`, and `Metadata` fields.
Binary data (images, audio, etc.) in `ToolOutput.Data` is automatically forwarded to the LLM when `MediaType` is set. For advanced use, return a `kit.ToolOutput` struct directly with `Data`, `MediaType`, and `Metadata` fields.
Use `kit.NewParallelTool` for tools that are safe to run concurrently. Use `kit.ToolCallIDFromContext(ctx)` to retrieve the LLM-assigned call ID for logging or tracing.