fix(sdk): infer ToolResponse.Type for binary data in NewTool/NewParallelTool

- Infer Type="image" for image/* MIME types and Type="media" for all other binary content so the downstream framework creates a media content block instead of silently discarding Data bytes (#17) - Extract shared toolOutputToResponse() helper to eliminate duplication - Add ImageResult() and MediaResult() convenience constructors - Add LLMToolCall and LLMToolResponse type aliases so SDK consumers can call Tool.Run() without importing the underlying framework - Add 6 regression tests covering image, media, and text responses Closes #17
perf(session,ui): reduce syscalls, allocations, and subprocess spam
2026-06-19 22:00:47 +00:00 · 2026-04-22 16:58:07 +03:00 · 2026-04-22 16:48:17 +03:00 · 2026-04-22 13:06:37 +03:00
14 changed files with 461 additions and 56 deletions
@@ -646,7 +646,28 @@ host, _ := kit.New(ctx, &kit.Options{
 })
 ```

-Use `kit.NewParallelTool` for tools safe to run concurrently. See the [SDK docs](/sdk/overview) for full details on struct tags, `ToolOutput` fields, and `ToolCallIDFromContext`.
+Use `kit.NewParallelTool` for tools safe to run concurrently. Binary data (images, audio, etc.) in `ToolOutput.Data` is automatically forwarded to the LLM when `MediaType` is set. See the [SDK docs](/sdk/overview) for full details on struct tags, `ToolOutput` fields, and `ToolCallIDFromContext`.
+
+#### Return Helpers
+
+| Helper | Description |
+| --- | --- |
+| `kit.TextResult(content)` | Successful text result |
+| `kit.ErrorResult(content)` | Error result (LLM sees it as a tool error) |
+| `kit.ImageResult(content, data, mediaType)` | Image result with binary data (e.g. `"image/png"`) |
+| `kit.MediaResult(content, data, mediaType)` | Non-image media result (e.g. `"audio/mpeg"`) |
+
+#### ToolOutput Fields
+
+```go
+kit.ToolOutput{
+    Content:   "result text",     // text returned to the LLM
+    IsError:   false,             // true = LLM sees this as an error
+    Data:      pngBytes,          // optional binary data (images, audio)
+    MediaType: "image/png",       // MIME type for binary Data
+    Metadata:  map[string]any{},  // opaque metadata for hooks/UI (not sent to LLM)
+}
+```

 ### With Callbacks

@@ -63,6 +63,11 @@ type TreeManager struct {

 	// file is the open file handle for appending entries. Nil for in-memory.
 	file *os.File
+
+	// writer is a buffered writer wrapping file. Writes go through this
+	// buffer and are flushed to disk at explicit sync points (after each
+	// public Append* call, in Close, etc.) to reduce syscall overhead.
+	writer *bufio.Writer
 }

 // --- Constructors ---
@@ -105,11 +110,16 @@ func CreateTreeSession(cwd string) (*TreeManager, error) {
 		return nil, fmt.Errorf("failed to create session file: %w", err)
 	}
 	tm.file = f
+	tm.writer = bufio.NewWriter(f)

 	if err := tm.writeEntry(&header); err != nil {
 		_ = f.Close()
 		return nil, fmt.Errorf("failed to write session header: %w", err)
 	}
+	if err := tm.flushLocked(); err != nil {
+		_ = f.Close()
+		return nil, fmt.Errorf("failed to flush session header: %w", err)
+	}

 	return tm, nil
 }
@@ -150,6 +160,7 @@ func (tm *TreeManager) ForkToNewSession(cwd string, targetID string) (*TreeManag
 		return nil, fmt.Errorf("failed to recreate session file: %w", err)
 	}
 	newTm.file = f
+	newTm.writer = bufio.NewWriter(f)

 	if err := newTm.writeEntry(&newTm.header); err != nil {
 		_ = f.Close()
@@ -289,6 +300,12 @@ func (tm *TreeManager) ForkToNewSession(cwd string, targetID string) (*TreeManag
 		}
 	}

+	// Flush all buffered writes from the fork in a single syscall.
+	if err := newTm.flushLocked(); err != nil {
+		_ = f.Close()
+		return nil, fmt.Errorf("failed to flush forked session: %w", err)
+	}
+
 	// Set the leaf to the last entry in the new session.
 	newTm.leafID = prevNewID

@@ -374,6 +391,7 @@ func OpenTreeSession(path string) (*TreeManager, error) {
 		return nil, fmt.Errorf("failed to open session file for append: %w", err)
 	}
 	tm.file = f
+	tm.writer = bufio.NewWriter(f)

 	return tm, nil
 }
@@ -427,6 +445,9 @@ func (tm *TreeManager) AppendMessage(msg message.Message) (string, error) {
 	if err := tm.appendAndPersist(entry); err != nil {
 		return "", err
 	}
+	if err := tm.flushLocked(); err != nil {
+		return "", fmt.Errorf("failed to flush message: %w", err)
+	}

 	tm.leafID = entry.ID
 	return entry.ID, nil
@@ -451,6 +472,9 @@ func (tm *TreeManager) AppendModelChange(provider, modelID string) (string, erro
 	if err := tm.appendAndPersist(entry); err != nil {
 		return "", err
 	}
+	if err := tm.flushLocked(); err != nil {
+		return "", fmt.Errorf("failed to flush model change: %w", err)
+	}

 	tm.leafID = entry.ID
 	return entry.ID, nil
@@ -465,6 +489,9 @@ func (tm *TreeManager) AppendBranchSummary(fromID, summary string) (string, erro
 	if err := tm.appendAndPersist(entry); err != nil {
 		return "", err
 	}
+	if err := tm.flushLocked(); err != nil {
+		return "", fmt.Errorf("failed to flush branch summary: %w", err)
+	}

 	tm.leafID = entry.ID
 	return entry.ID, nil
@@ -479,6 +506,9 @@ func (tm *TreeManager) AppendLabel(targetID, label string) (string, error) {
 	if err := tm.appendAndPersist(entry); err != nil {
 		return "", err
 	}
+	if err := tm.flushLocked(); err != nil {
+		return "", fmt.Errorf("failed to flush label: %w", err)
+	}

 	tm.labels[targetID] = label
 	tm.leafID = entry.ID
@@ -494,6 +524,9 @@ func (tm *TreeManager) AppendSessionInfo(name string) (string, error) {
 	if err := tm.appendAndPersist(entry); err != nil {
 		return "", err
 	}
+	if err := tm.flushLocked(); err != nil {
+		return "", fmt.Errorf("failed to flush session info: %w", err)
+	}

 	tm.sessionName = name
 	tm.leafID = entry.ID
@@ -510,6 +543,9 @@ func (tm *TreeManager) AppendExtensionData(extType, data string) (string, error)
 	if err := tm.appendAndPersist(entry); err != nil {
 		return "", err
 	}
+	if err := tm.flushLocked(); err != nil {
+		return "", fmt.Errorf("failed to flush extension data: %w", err)
+	}

 	tm.leafID = entry.ID
 	return entry.ID, nil
@@ -541,6 +577,9 @@ func (tm *TreeManager) AppendCompaction(summary, firstKeptEntryID string, tokens
 	if err := tm.appendAndPersist(entry); err != nil {
 		return "", err
 	}
+	if err := tm.flushLocked(); err != nil {
+		return "", fmt.Errorf("failed to flush compaction: %w", err)
+	}

 	tm.leafID = entry.ID
 	return entry.ID, nil
@@ -926,11 +965,31 @@ func (tm *TreeManager) IsEmpty() bool {
 	return tm.MessageCount() == 0
 }

-// Close closes the underlying file handle.
+// Flush writes any buffered data to the underlying file.
+func (tm *TreeManager) Flush() error {
+	tm.mu.Lock()
+	defer tm.mu.Unlock()
+	return tm.flushLocked()
+}
+
+// flushLocked writes buffered data to disk. Caller must hold the lock.
+func (tm *TreeManager) flushLocked() error {
+	if tm.writer != nil {
+		return tm.writer.Flush()
+	}
+	return nil
+}
+
+// Close flushes any buffered writes and closes the underlying file handle.
 func (tm *TreeManager) Close() error {
 	tm.mu.Lock()
 	defer tm.mu.Unlock()
 	if tm.file != nil {
+		// Flush buffered data before closing.
+		if tm.writer != nil {
+			_ = tm.writer.Flush()
+			tm.writer = nil
+		}
 		err := tm.file.Close()
 		tm.file = nil
 		return err
@@ -1090,13 +1149,22 @@ func (tm *TreeManager) GetLastCompaction() *CompactionEntry {

 // AddLLMMessages appends multiple LLM messages as entries. This is
 // used when syncing from the agent's ConversationMessages after a step.
+// All entries are buffered and flushed to disk in a single batch.
 func (tm *TreeManager) AddLLMMessages(msgs []fantasy.Message) error {
+	tm.mu.Lock()
+	defer tm.mu.Unlock()
+
 	for _, msg := range msgs {
-		if _, err := tm.AppendLLMMessage(msg); err != nil {
+		entry, err := NewMessageEntry(tm.leafID, message.FromLLMMessage(msg))
+		if err != nil {
 			return err
 		}
+		if err := tm.appendAndPersist(entry); err != nil {
+			return err
+		}
+		tm.leafID = entry.ID
 	}
-	return nil
+	return tm.flushLocked()
 }

 // Deprecated: Use AddLLMMessages instead.
@@ -1148,12 +1216,20 @@ func (tm *TreeManager) appendAndPersist(entry any) error {
 	return nil
 }

-// writeEntry serializes an entry and appends it as a line to the file.
+// writeEntry serializes an entry and appends it to the buffered writer.
+// The data is not flushed to disk until flushLocked is called.
 func (tm *TreeManager) writeEntry(entry any) error {
 	data, err := json.Marshal(entry)
 	if err != nil {
 		return fmt.Errorf("failed to marshal entry: %w", err)
 	}
+	if tm.writer != nil {
+		if _, err := tm.writer.Write(data); err != nil {
+			return err
+		}
+		return tm.writer.WriteByte('\n')
+	}
+	// Fallback for direct file writes (shouldn't happen in normal flow).
 	data = append(data, '\n')
 	_, err = tm.file.Write(data)
 	return err
@@ -6,6 +6,8 @@ import (
 	"path/filepath"
 	"sort"
 	"strings"
+	"sync"
+	"time"
 )

 // FileSuggestion represents a single file, directory, or MCP resource
@@ -31,6 +33,51 @@ type FileSuggestion struct {
 // maxFileSuggestions is the maximum number of file suggestions returned.
 const maxFileSuggestions = 20

+// fileListCache caches the result of listFiles() keyed by directory to avoid
+// re-running git subprocesses on every keystroke during @file completion.
+var fileListCache struct {
+	mu       sync.Mutex
+	dir      string           // searchDir that produced the cached entries
+	cwd      string           // cwd used for the git query
+	entries  []FileSuggestion // cached file list
+	expireAt time.Time        // when the cache entry expires
+}
+
+// fileListCacheTTL controls how long a cached file list stays valid.
+// During rapid typing the list is reused; after the TTL a fresh git
+// ls-files is executed so newly created files become visible.
+const fileListCacheTTL = 3 * time.Second
+
+// getCachedFileList returns the file list for searchDir, using a short-lived
+// cache to avoid repeated subprocess calls during @file autocompletion.
+func getCachedFileList(searchDir, cwd string) []FileSuggestion {
+	fileListCache.mu.Lock()
+	defer fileListCache.mu.Unlock()
+
+	now := time.Now()
+	if fileListCache.dir == searchDir &&
+		fileListCache.cwd == cwd &&
+		now.Before(fileListCache.expireAt) {
+		// Return a copy so callers can mutate (e.g. prepend baseDir).
+		cp := make([]FileSuggestion, len(fileListCache.entries))
+		copy(cp, fileListCache.entries)
+		return cp
+	}
+
+	// Cache miss or expired — run the real (potentially expensive) lookup.
+	files := listFiles(searchDir, cwd)
+
+	fileListCache.dir = searchDir
+	fileListCache.cwd = cwd
+	fileListCache.entries = files
+	fileListCache.expireAt = now.Add(fileListCacheTTL)
+
+	// Return a copy.
+	cp := make([]FileSuggestion, len(files))
+	copy(cp, files)
+	return cp
+}
+
 // ExtractAtPrefix checks the current line for an @-file trigger at cursorCol.
 // It returns:
 //   - hasAt: true if a valid @ trigger was found
@@ -99,7 +146,7 @@ func GetFileSuggestions(prefix string, cwd string) []FileSuggestion {
 		}
 	}

-	files := listFiles(searchDir, cwd)
+	files := getCachedFileList(searchDir, cwd)
 	if len(files) == 0 {
 		return nil
 	}
@@ -109,8 +109,8 @@ func (m *TextMessageItem) renderContent(width int) string {
 // It accumulates content chunks and re-renders on each update for live display.
 type StreamingMessageItem struct {
 	id            string
-	role          string // "assistant" or "reasoning"
-	content       string // Accumulated streaming content
+	role          string          // "assistant" or "reasoning"
+	content       strings.Builder // Accumulated streaming content
 	timestamp     time.Time
 	startTime     time.Time // When streaming started (for live duration counter)
 	modelName     string
@@ -156,10 +156,10 @@ func (s *StreamingMessageItem) Render(width int) string {
 			durationMs = time.Since(s.startTime).Milliseconds()
 		}
 		ty := createTypography(style.GetTheme())
-		rendered = render.ReasoningBlock(s.content, durationMs, width, ty, style.GetTheme())
+		rendered = render.ReasoningBlock(s.content.String(), durationMs, width, ty, style.GetTheme())
 	} else {
 		// Render as assistant message
-		rendered = render.AssistantBlock(s.content, width, style.GetTheme())
+		rendered = render.AssistantBlock(s.content.String(), width, style.GetTheme())
 	}

 	// Cache and return (but reasoning is never cached due to live duration)
@@ -187,7 +187,7 @@ func (s *StreamingMessageItem) Height() int {

 // AppendChunk adds a content chunk and invalidates the render cache.
 func (s *StreamingMessageItem) AppendChunk(chunk string) {
-	s.content += chunk
+	s.content.WriteString(chunk)
 	s.cachedWidth = 0 // Invalidate cache
 }

@@ -243,9 +243,7 @@ func (m *StreamingBashOutputItem) Render(width int) string {

 	// Header with command
 	if m.command != "" {
-		headerStyle := lipgloss.NewStyle().
-			Foreground(theme.Muted).
-			Italic(true)
+		headerStyle := style.GetCachedStyles().BashHeader
 		parts = append(parts, headerStyle.Render(fmt.Sprintf("▸ %s", m.command)))
 	}

@@ -338,7 +338,7 @@ func (r *MessageRenderer) RenderToolMessage(toolName, toolArgs, toolResult strin
 	// Build the content: icon + name + params on first line, then body
 	headerLine := styledIcon + " " + styledName
 	if params != "" {
-		headerLine += " " + lipgloss.NewStyle().Foreground(theme.Muted).Render(params)
+		headerLine += " " + style.GetCachedStyles().ToolMuted.Render(params)
 	}

 	// Get body content
@@ -45,7 +45,7 @@ func UserBlock(content string, width int, ty *herald.Typography, theme style.The
 // HighlightFileTokens wraps @file tokens in the given text with the theme
 // accent color so they stand out visually in rendered user messages.
 func HighlightFileTokens(text string, theme style.Theme) string {
-	accentStyle := lipgloss.NewStyle().Foreground(theme.Accent).Bold(true)
+	accentStyle := style.GetCachedStyles().FileTokenAccent
 	return fileTokenPattern.ReplaceAllStringFunc(text, func(token string) string {
 		return accentStyle.Render(token)
 	})
@@ -75,8 +75,8 @@ func ReasoningBlock(content string, duration int64, width int, ty *herald.Typogr
 	if width > 4 {
 		contentStr = wrapText(contentStr, width-4)
 	}
-	mutedStyle := lipgloss.NewStyle().Foreground(theme.Muted)
-	contentRendered := mutedStyle.Render(ty.Italic(contentStr))
+	cs := style.GetCachedStyles()
+	contentRendered := cs.Muted.Render(ty.Italic(contentStr))

 	// Build label based on duration
 	if duration > 0 {
@@ -86,14 +86,14 @@ func ReasoningBlock(content string, duration int64, width int, ty *herald.Typogr
 		} else {
 			durationStr = fmt.Sprintf("%.1fs", float64(duration)/1000)
 		}
-		labelPart := lipgloss.NewStyle().Foreground(theme.VeryMuted).Render("Thought for ")
-		durationPart := lipgloss.NewStyle().Foreground(theme.Accent).Render(durationStr)
+		labelPart := cs.VeryMuted.Render("Thought for ")
+		durationPart := cs.Accent.Render(durationStr)
 		label := labelPart + durationPart
 		rendered := contentRendered + "\n" + label
 		return styleMarginBottom(theme, rendered)
 	}

-	label := lipgloss.NewStyle().Foreground(theme.VeryMuted).Render("Thought")
+	label := cs.VeryMuted.Render("Thought")
 	rendered := contentRendered + "\n" + label

 	return styleMarginBottom(theme, rendered)
@@ -194,7 +194,7 @@ func ToolBlock(displayName, params, body string, isError bool, width int, ty *he

 // styleMarginBottom applies a 1-line margin bottom using the theme.
 func styleMarginBottom(theme style.Theme, content string) string {
-	return lipgloss.NewStyle().MarginBottom(1).Render(content)
+	return style.GetCachedStyles().MarginBottom1.Render(content)
 }

 // wrapText soft-wraps a string to the given width using lipgloss, which is
@@ -21,12 +21,11 @@ func knightRiderFrames() []string {
 	const numDots = 8
 	const dot = "▪"

-	theme := style.GetTheme()
-
-	bright := lipgloss.NewStyle().Foreground(theme.Primary)
-	med := lipgloss.NewStyle().Foreground(theme.Muted)
-	dim := lipgloss.NewStyle().Foreground(theme.VeryMuted)
-	off := lipgloss.NewStyle().Foreground(theme.MutedBorder)
+	cs := style.GetCachedStyles()
+	bright := cs.SpinnerBright
+	med := cs.SpinnerMed
+	dim := cs.SpinnerDim
+	off := cs.SpinnerOff

 	// Scanner bounces: 0→7→0
 	positions := make([]int, 0, 2*numDots-2)
@@ -476,9 +475,8 @@ func (s *StreamComponent) renderReasoningBlock(reasoning string) string {
 	if s.width > 4 {
 		content = lipgloss.NewStyle().Width(s.width - 4).Render(content)
 	}
-	theme := GetTheme()
-	mutedStyle := lipgloss.NewStyle().Foreground(theme.Muted)
-	parts = append(parts, mutedStyle.Render(s.ty.Italic(content)))
+	cs := style.GetCachedStyles()
+	parts = append(parts, cs.Muted.Render(s.ty.Italic(content)))

 	// Duration footer with VeryMuted label and Accent duration.
 	var duration time.Duration
@@ -494,8 +492,8 @@ func (s *StreamComponent) renderReasoningBlock(reasoning string) string {
 		} else {
 			durationStr = fmt.Sprintf("%.1fs", duration.Seconds())
 		}
-		label := lipgloss.NewStyle().Foreground(theme.VeryMuted).Render("Thought for ")
-		durationStyled := lipgloss.NewStyle().Foreground(theme.Accent).Render(durationStr)
+		label := cs.VeryMuted.Render("Thought for ")
+		durationStyled := cs.Accent.Render(durationStr)
 		parts = append(parts, label+durationStyled)
 	}

@@ -40,6 +40,70 @@ func GetTheme() Theme {
 func SetTheme(theme Theme) {
 	currentTheme = theme
 	markdownTypographyCache = nil // invalidate cached renderer; colors may have changed
+	styleCache = nil              // invalidate cached styles; colors may have changed
+}
+
+// CachedStyles holds pre-built lipgloss styles that are reused across
+// render frames. Invalidated by SetTheme, lazily rebuilt on next access.
+// Only accessed from BubbleTea's single-threaded Update/View cycle.
+type CachedStyles struct {
+	// render/blocks.go
+	FileTokenAccent lipgloss.Style // Foreground(Accent).Bold(true)
+	Muted           lipgloss.Style // Foreground(Muted)
+	VeryMuted       lipgloss.Style // Foreground(VeryMuted)
+	Accent          lipgloss.Style // Foreground(Accent)
+	MarginBottom1   lipgloss.Style // MarginBottom(1)
+
+	// stream.go - spinner phases
+	SpinnerBright lipgloss.Style // Foreground(Primary)
+	SpinnerMed    lipgloss.Style // Foreground(Muted)
+	SpinnerDim    lipgloss.Style // Foreground(VeryMuted)
+	SpinnerOff    lipgloss.Style // Foreground(MutedBorder)
+
+	// message_items.go - bash output
+	BashHeader lipgloss.Style // Foreground(Muted).Italic(true)
+	BashStderr lipgloss.Style // Foreground(Error)
+
+	// render/blocks.go - tool block
+	ToolSuccess lipgloss.Style // Foreground(Success)
+	ToolError   lipgloss.Style // Foreground(Error)
+	ToolInfo    lipgloss.Style // Foreground(Info).Bold(true)
+	ToolMuted   lipgloss.Style // Foreground(Muted)
+
+	// common
+	ErrorFg  lipgloss.Style // Foreground(Error)
+	TextBold lipgloss.Style // Foreground(Text).Bold(true)
+}
+
+var styleCache *CachedStyles
+
+// GetCachedStyles returns the pre-built style cache, creating it lazily
+// from the current theme. Invalidated by SetTheme.
+func GetCachedStyles() *CachedStyles {
+	if styleCache != nil {
+		return styleCache
+	}
+	theme := GetTheme()
+	styleCache = &CachedStyles{
+		FileTokenAccent: lipgloss.NewStyle().Foreground(theme.Accent).Bold(true),
+		Muted:           lipgloss.NewStyle().Foreground(theme.Muted),
+		VeryMuted:       lipgloss.NewStyle().Foreground(theme.VeryMuted),
+		Accent:          lipgloss.NewStyle().Foreground(theme.Accent),
+		MarginBottom1:   lipgloss.NewStyle().MarginBottom(1),
+		SpinnerBright:   lipgloss.NewStyle().Foreground(theme.Primary),
+		SpinnerMed:      lipgloss.NewStyle().Foreground(theme.Muted),
+		SpinnerDim:      lipgloss.NewStyle().Foreground(theme.VeryMuted),
+		SpinnerOff:      lipgloss.NewStyle().Foreground(theme.MutedBorder),
+		BashHeader:      lipgloss.NewStyle().Foreground(theme.Muted).Italic(true),
+		BashStderr:      lipgloss.NewStyle().Foreground(theme.Error),
+		ToolSuccess:     lipgloss.NewStyle().Foreground(theme.Success),
+		ToolError:       lipgloss.NewStyle().Foreground(theme.Error),
+		ToolInfo:        lipgloss.NewStyle().Foreground(theme.Info).Bold(true),
+		ToolMuted:       lipgloss.NewStyle().Foreground(theme.Muted),
+		ErrorFg:         lipgloss.NewStyle().Foreground(theme.Error),
+		TextBold:        lipgloss.NewStyle().Foreground(theme.Text).Bold(true),
+	}
+	return styleCache
 }

 // MarkdownThemeColors defines colors for markdown rendering and syntax highlighting.
@@ -1781,12 +1781,19 @@ func (m *Kit) Subagent(ctx context.Context, cfg SubagentConfig) (*SubagentResult

 	// Create child Kit instance. Pass the parent's loaded MCP config to
 	// avoid re-reading viper (which races with concurrent subagent spawns).
+	// Streaming must be explicitly enabled — Options.Streaming defaults to
+	// false, and New() unconditionally writes viper.Set("stream", opts.Streaming).
+	// Without this, the subagent would (a) pollute viper global state for
+	// other concurrent callers and (b) potentially hit provider-level
+	// differences (e.g. Anthropic non-streaming timeouts with extended
+	// thinking).
 	childOpts := &Options{
 		Model:        model,
 		SystemPrompt: systemPrompt,
 		Tools:        tools,
 		NoSession:    cfg.NoSession,
 		Quiet:        true,
+		Streaming:    true,
 		MCPConfig:    m.mcpConfig,
 	}
 	child, err := New(ctx, childOpts)
@@ -2,6 +2,7 @@ package kit

 import (
 	"context"
+	"strings"

 	"charm.land/fantasy"

@@ -52,6 +53,22 @@ func ErrorResult(content string) ToolOutput {
 	return ToolOutput{Content: content, IsError: true}
 }

+// ImageResult creates a [ToolOutput] that returns an image to the LLM.
+// The data is the raw image bytes and mediaType is the MIME type
+// (e.g. "image/png", "image/jpeg"). The optional text content accompanies
+// the image and is visible to the LLM alongside it.
+func ImageResult(content string, data []byte, mediaType string) ToolOutput {
+	return ToolOutput{Content: content, Data: data, MediaType: mediaType}
+}
+
+// MediaResult creates a [ToolOutput] that returns non-image binary media
+// (e.g. audio, video) to the LLM. The data is the raw bytes and mediaType
+// is the MIME type (e.g. "audio/wav", "video/mp4"). The optional text
+// content accompanies the media.
+func MediaResult(content string, data []byte, mediaType string) ToolOutput {
+	return ToolOutput{Content: content, Data: data, MediaType: mediaType}
+}
+
 // toolCallIDKey is the context key for the tool call ID.
 type toolCallIDKey struct{}

@@ -63,9 +80,35 @@ func ToolCallIDFromContext(ctx context.Context) string {
 	return s
 }

+// toolOutputToResponse converts a [ToolOutput] into the underlying
+// framework's ToolResponse, inferring the response Type from Data/MediaType
+// so that binary content (images, audio, etc.) is forwarded to the LLM
+// instead of being silently dropped.
+func toolOutputToResponse(result ToolOutput) fantasy.ToolResponse {
+	resp := fantasy.ToolResponse{
+		Content:   result.Content,
+		IsError:   result.IsError,
+		Data:      result.Data,
+		MediaType: result.MediaType,
+	}
+	// Infer response type from binary data so the downstream framework
+	// creates a media content block instead of a plain-text one.
+	if len(result.Data) > 0 && result.MediaType != "" {
+		if strings.HasPrefix(result.MediaType, "image/") {
+			resp.Type = "image"
+		} else {
+			resp.Type = "media"
+		}
+	}
+	if result.Metadata != nil {
+		resp = fantasy.WithResponseMetadata(resp, result.Metadata)
+	}
+	return resp
+}
+
 // NewTool creates a custom [Tool] with automatic JSON schema generation from
 // the TInput struct type. The handler receives a typed input (deserialized
-// from the LLM's JSON arguments) and returns a [ToolResult].
+// from the LLM's JSON arguments) and returns a [ToolOutput].
 //
 // Struct tags on TInput control the generated schema:
 //
@@ -77,6 +120,11 @@ func ToolCallIDFromContext(ctx context.Context) string {
 // The tool call ID is injected into the context and can be retrieved with
 // [ToolCallIDFromContext].
 //
+// Binary results: When [ToolOutput.Data] and [ToolOutput.MediaType] are set,
+// the response type is automatically inferred so the LLM receives the binary
+// content (e.g. an image) instead of only the text. Use [ImageResult] or
+// [MediaResult] for convenience.
+//
 // Example:
 //
 //	type WeatherInput struct {
@@ -84,7 +132,7 @@ func ToolCallIDFromContext(ctx context.Context) string {
 //	}
 //
 //	tool := kit.NewTool("get_weather", "Get weather for a city",
-//	    func(ctx context.Context, input WeatherInput) (kit.ToolResult, error) {
+//	    func(ctx context.Context, input WeatherInput) (kit.ToolOutput, error) {
 //	        return kit.TextResult("72°F, sunny in " + input.City), nil
 //	    },
 //	)
@@ -96,16 +144,7 @@ func NewTool[TInput any](name, description string, fn func(ctx context.Context,
 			if err != nil {
 				return fantasy.NewTextErrorResponse(err.Error()), nil
 			}
-			resp := fantasy.ToolResponse{
-				Content:   result.Content,
-				IsError:   result.IsError,
-				Data:      result.Data,
-				MediaType: result.MediaType,
-			}
-			if result.Metadata != nil {
-				resp = fantasy.WithResponseMetadata(resp, result.Metadata)
-			}
-			return resp, nil
+			return toolOutputToResponse(result), nil
 		},
 	)
 }
@@ -121,16 +160,7 @@ func NewParallelTool[TInput any](name, description string, fn func(ctx context.C
 			if err != nil {
 				return fantasy.NewTextErrorResponse(err.Error()), nil
 			}
-			resp := fantasy.ToolResponse{
-				Content:   result.Content,
-				IsError:   result.IsError,
-				Data:      result.Data,
-				MediaType: result.MediaType,
-			}
-			if result.Metadata != nil {
-				resp = fantasy.WithResponseMetadata(resp, result.Metadata)
-			}
-			return resp, nil
+			return toolOutputToResponse(result), nil
 		},
 	)
 }
@@ -117,3 +117,149 @@ func TestToolOutput_BinaryData(t *testing.T) {
 		t.Errorf("MediaType = %q, want %q", r.MediaType, "image/png")
 	}
 }
+
+// TestImageResult verifies the ImageResult convenience constructor.
+func TestImageResult(t *testing.T) {
+	data := []byte{0x89, 0x50, 0x4E, 0x47}
+	r := kit.ImageResult("here is the image", data, "image/png")
+	if r.Content != "here is the image" {
+		t.Errorf("Content = %q, want %q", r.Content, "here is the image")
+	}
+	if len(r.Data) != 4 {
+		t.Errorf("Data len = %d, want 4", len(r.Data))
+	}
+	if r.MediaType != "image/png" {
+		t.Errorf("MediaType = %q, want %q", r.MediaType, "image/png")
+	}
+	if r.IsError {
+		t.Error("ImageResult should not set IsError")
+	}
+}
+
+// TestMediaResult verifies the MediaResult convenience constructor.
+func TestMediaResult(t *testing.T) {
+	data := []byte{0xFF, 0xFB, 0x90, 0x00}
+	r := kit.MediaResult("audio clip", data, "audio/mpeg")
+	if r.Content != "audio clip" {
+		t.Errorf("Content = %q, want %q", r.Content, "audio clip")
+	}
+	if len(r.Data) != 4 {
+		t.Errorf("Data len = %d, want 4", len(r.Data))
+	}
+	if r.MediaType != "audio/mpeg" {
+		t.Errorf("MediaType = %q, want %q", r.MediaType, "audio/mpeg")
+	}
+	if r.IsError {
+		t.Error("MediaResult should not set IsError")
+	}
+}
+
+// TestNewTool_BinaryImageResponse verifies that NewTool correctly infers the
+// response type for image data so binary content is forwarded to the LLM
+// (issue #17).
+func TestNewTool_BinaryImageResponse(t *testing.T) {
+	type Input struct {
+		Path string `json:"path"`
+	}
+
+	imgData := []byte{0x89, 0x50, 0x4E, 0x47} // PNG magic bytes
+
+	tool := kit.NewTool("read_image", "Read an image file",
+		func(ctx context.Context, input Input) (kit.ToolOutput, error) {
+			return kit.ImageResult("Here is the image", imgData, "image/png"), nil
+		},
+	)
+
+	// Run the tool and inspect the raw ToolResponse via the AgentTool interface.
+	resp, err := tool.Run(context.Background(), kit.LLMToolCall{
+		ID:    "call_1",
+		Name:  "read_image",
+		Input: `{"path": "test.png"}`,
+	})
+	if err != nil {
+		t.Fatalf("Run() error: %v", err)
+	}
+
+	// The Type field must be "image" so the downstream framework creates a
+	// media content block instead of discarding the binary data.
+	if resp.Type != "image" {
+		t.Errorf("ToolResponse.Type = %q, want %q", resp.Type, "image")
+	}
+	if len(resp.Data) != 4 {
+		t.Errorf("ToolResponse.Data len = %d, want 4", len(resp.Data))
+	}
+	if resp.MediaType != "image/png" {
+		t.Errorf("ToolResponse.MediaType = %q, want %q", resp.MediaType, "image/png")
+	}
+	if resp.Content != "Here is the image" {
+		t.Errorf("ToolResponse.Content = %q, want %q", resp.Content, "Here is the image")
+	}
+}
+
+// TestNewTool_BinaryMediaResponse verifies type inference for non-image media.
+func TestNewTool_BinaryMediaResponse(t *testing.T) {
+	type Input struct{}
+
+	tool := kit.NewTool("get_audio", "Get audio",
+		func(ctx context.Context, input Input) (kit.ToolOutput, error) {
+			return kit.MediaResult("audio clip", []byte{0xFF, 0xFB}, "audio/mpeg"), nil
+		},
+	)
+
+	resp, err := tool.Run(context.Background(), kit.LLMToolCall{
+		ID:    "call_2",
+		Name:  "get_audio",
+		Input: `{}`,
+	})
+	if err != nil {
+		t.Fatalf("Run() error: %v", err)
+	}
+	if resp.Type != "media" {
+		t.Errorf("ToolResponse.Type = %q, want %q", resp.Type, "media")
+	}
+}
+
+// TestNewTool_TextResponseTypeNotSet verifies that text-only responses do NOT
+// get an inferred type (preserving existing behavior).
+func TestNewTool_TextResponseTypeNotSet(t *testing.T) {
+	type Input struct{}
+
+	tool := kit.NewTool("echo", "Echo",
+		func(ctx context.Context, input Input) (kit.ToolOutput, error) {
+			return kit.TextResult("hello"), nil
+		},
+	)
+
+	resp, err := tool.Run(context.Background(), kit.LLMToolCall{
+		ID: "call_3", Name: "echo", Input: `{}`,
+	})
+	if err != nil {
+		t.Fatalf("Run() error: %v", err)
+	}
+	// Text responses should not have Type set (the framework treats "" as text).
+	if resp.Type != "" {
+		t.Errorf("ToolResponse.Type = %q, want empty string for text responses", resp.Type)
+	}
+}
+
+// TestNewParallelTool_BinaryImageResponse mirrors the NewTool binary test for
+// NewParallelTool.
+func TestNewParallelTool_BinaryImageResponse(t *testing.T) {
+	type Input struct{}
+
+	tool := kit.NewParallelTool("snap", "Take a snapshot",
+		func(ctx context.Context, input Input) (kit.ToolOutput, error) {
+			return kit.ImageResult("snapshot", []byte{0xFF, 0xD8}, "image/jpeg"), nil
+		},
+	)
+
+	resp, err := tool.Run(context.Background(), kit.LLMToolCall{
+		ID: "call_4", Name: "snap", Input: `{}`,
+	})
+	if err != nil {
+		t.Fatalf("Run() error: %v", err)
+	}
+	if resp.Type != "image" {
+		t.Errorf("ToolResponse.Type = %q, want %q", resp.Type, "image")
+	}
+}
@@ -157,6 +157,18 @@ type LLMTextPart = fantasy.TextPart
 // LLMReasoningPart is a reasoning/chain-of-thought content part.
 type LLMReasoningPart = fantasy.ReasoningPart

+// LLMToolCall represents the raw tool invocation passed to a [Tool]'s Run
+// method. It carries the call ID, tool name, and the JSON-encoded input
+// arguments from the LLM. This is the execution-layer call object — distinct
+// from [ToolCall] (a message content part).
+type LLMToolCall = fantasy.ToolCall
+
+// LLMToolResponse represents the raw response returned from a [Tool]'s Run
+// method. Most SDK consumers should use [ToolOutput] with [NewTool] /
+// [NewParallelTool] instead — this alias is provided for advanced use cases
+// that need to call Tool.Run() directly (e.g. testing).
+type LLMToolResponse = fantasy.ToolResponse
+
 // LLMToolCallPart represents an LLM-initiated tool invocation within a message.
 type LLMToolCallPart = fantasy.ToolCallPart

@@ -493,6 +493,8 @@ host, _ := kit.New(ctx, &kit.Options{
 |----------|-------------|
 | `kit.TextResult(content)` | Successful text result |
 | `kit.ErrorResult(content)` | Error result (LLM sees it as a tool error) |
+| `kit.ImageResult(content, data, mediaType)` | Image result with binary data (e.g. `"image/png"`) |
+| `kit.MediaResult(content, data, mediaType)` | Non-image media result (e.g. `"audio/mpeg"`) |

 **ToolOutput fields** (for advanced use):

@@ -1095,6 +1097,8 @@ kit.LLMUsage        // {InputTokens, OutputTokens, TotalTokens, ReasoningTokens,
                     //  CacheCreationTokens, CacheReadTokens}
 kit.LLMResponse     // {Content, FinishReason, Usage}
 kit.LLMFilePart     // {Filename, Data []byte, MediaType}
+kit.LLMToolCall     // {ID, Name, Input string} — execution-layer tool call (for Tool.Run)
+kit.LLMToolResponse // {Type, Content, Data, MediaType, IsError, ...} — raw tool response

 // Compaction types
 kit.CompactionResult, kit.CompactionOptions
@@ -101,8 +101,10 @@ Return values:
 |--------|-------------|
 | `kit.TextResult(s)` | Successful text result |
 | `kit.ErrorResult(s)` | Error result (LLM sees it as a tool error) |
+| `kit.ImageResult(s, data, mediaType)` | Image result with binary data (e.g. `"image/png"`) |
+| `kit.MediaResult(s, data, mediaType)` | Non-image media result (e.g. `"audio/mpeg"`) |

-For advanced use, return a `kit.ToolOutput` struct directly with `Data`, `MediaType`, and `Metadata` fields.
+Binary data (images, audio, etc.) in `ToolOutput.Data` is automatically forwarded to the LLM when `MediaType` is set. For advanced use, return a `kit.ToolOutput` struct directly with `Data`, `MediaType`, and `Metadata` fields.

 Use `kit.NewParallelTool` for tools that are safe to run concurrently. Use `kit.ToolCallIDFromContext(ctx)` to retrieve the LLM-assigned call ID for logging or tracing.