From d1cffb85eff870e736eaafaf5af1e89dedd458e8 Mon Sep 17 00:00:00 2001 From: Ed Zynda Date: Tue, 24 Mar 2026 15:13:35 +0300 Subject: [PATCH] fix: prevent bash tool from hanging on long-running/background processes - Use process group isolation (Setpgid) so the entire process tree is killed on timeout/cancellation, not just the direct child - Set cmd.Cancel to kill the process group (-pgid) with SIGKILL - Set cmd.WaitDelay (500ms grace period) to force-close pipes when grandchild processes hold them open after the direct child exits - Convert buffered path from cmd.Run() to explicit pipes + cmd.Start() + cmd.Wait() so WaitDelay can properly force-close pipe handles - Reorder streaming path: cmd.Wait() before wg.Wait() so the WaitDelay timer starts when the child exits, not after pipes close - Add mutex for thread-safe chunk collection in streaming mode - Add comprehensive tests for timeout, background processes, context cancellation, and both buffered/streaming paths --- internal/core/bash.go | 71 +++++++++++++++----- internal/core/bash_test.go | 129 +++++++++++++++++++++++++++++++++++++ 2 files changed, 184 insertions(+), 16 deletions(-) create mode 100644 internal/core/bash_test.go diff --git a/internal/core/bash.go b/internal/core/bash.go index e99d69b9..f948e256 100644 --- a/internal/core/bash.go +++ b/internal/core/bash.go @@ -136,16 +136,54 @@ func executeBash(ctx context.Context, call fantasy.ToolCall, workDir string) (fa } // executeBashBuffered collects all output before returning (original behavior). +// It uses explicit pipes (not cmd.Stdout) so that cmd.WaitDelay can forcibly +// close them when grandchild processes hold pipe handles open after the +// direct child exits. func executeBashBuffered(cmdCtx context.Context, call fantasy.ToolCall, cmd *exec.Cmd) (fantasy.ToolResponse, error) { - var stdout, stderr strings.Builder - cmd.Stdout = &stdout - cmd.Stderr = &stderr + stdoutPipe, err := cmd.StdoutPipe() + if err != nil { + return fantasy.NewTextErrorResponse("failed to create stdout pipe"), nil + } + stderrPipe, err := cmd.StderrPipe() + if err != nil { + return fantasy.NewTextErrorResponse("failed to create stderr pipe"), nil + } - err := cmd.Run() + if err := cmd.Start(); err != nil { + return fantasy.NewTextErrorResponse(fmt.Sprintf("failed to start command: %v", err)), nil + } + + // Read pipes concurrently + var wg sync.WaitGroup + var stdout, stderr strings.Builder + var stdoutErr, stderrErr error + + wg.Add(2) + go func() { + defer wg.Done() + _, stdoutErr = io.Copy(&stdout, stdoutPipe) + }() + go func() { + defer wg.Done() + _, stderrErr = io.Copy(&stderr, stderrPipe) + }() + + // Wait for the process to exit first. cmd.WaitDelay ensures that if + // pipes remain open (held by grandchild processes), they'll be forcibly + // closed after the grace period, which unblocks the io.Copy goroutines. + waitErr := cmd.Wait() + + // Wait for pipe readers to finish draining. + wg.Wait() + + // Ignore pipe read errors caused by WaitDelay force-closing — + // we still have whatever was read before the close. + _ = stdoutErr + _ = stderrErr exitCode := 0 - if err != nil { - if exitErr, ok := err.(*exec.ExitError); ok { + if waitErr != nil { + if exitErr, ok := waitErr.(*exec.ExitError); ok { exitCode = exitErr.ExitCode() } else if cmdCtx.Err() == context.DeadlineExceeded { return fantasy.NewTextErrorResponse("command timed out"), nil @@ -173,6 +211,7 @@ func executeBashStreaming(cmdCtx context.Context, call fantasy.ToolCall, cmd *ex // Stream stdout and stderr concurrently var wg sync.WaitGroup + var mu sync.Mutex var stdoutChunks, stderrChunks []string streamOutput := func(reader io.Reader, isStderr bool) { @@ -187,17 +226,13 @@ func executeBashStreaming(cmdCtx context.Context, call fantasy.ToolCall, cmd *ex // Send chunk to UI outputCallback(call.ID, "bash", chunk, isStderr) // Collect for final result + mu.Lock() if isStderr { stderrChunks = append(stderrChunks, chunk) } else { stdoutChunks = append(stdoutChunks, chunk) } - // Check if context was cancelled - select { - case <-cmdCtx.Done(): - return - default: - } + mu.Unlock() } } @@ -205,12 +240,16 @@ func executeBashStreaming(cmdCtx context.Context, call fantasy.ToolCall, cmd *ex go streamOutput(stdoutPipe, false) go streamOutput(stderrPipe, true) - // Wait for both streams to complete - wg.Wait() - - // Wait for command to finish + // Wait for the process to exit. cmd.WaitDelay ensures that if pipes + // remain open (held by grandchild processes), they'll be forcibly closed + // after the grace period, which unblocks the scanners above. err = cmd.Wait() + // Wait for the pipe readers to finish draining. This will complete + // quickly since cmd.Wait() (with WaitDelay) has already ensured + // the pipes are closed. + wg.Wait() + exitCode := 0 if err != nil { if exitErr, ok := err.(*exec.ExitError); ok { diff --git a/internal/core/bash_test.go b/internal/core/bash_test.go new file mode 100644 index 00000000..86ab4155 --- /dev/null +++ b/internal/core/bash_test.go @@ -0,0 +1,129 @@ +package core + +import ( + "context" + "encoding/json" + "testing" + "time" + + "charm.land/fantasy" +) + +// helper to create a bash tool call with the given command and optional timeout. +func bashCall(command string, timeout float64) fantasy.ToolCall { + args := map[string]any{"command": command} + if timeout > 0 { + args["timeout"] = timeout + } + input, _ := json.Marshal(args) + return fantasy.ToolCall{ + ID: "test-call", + Name: "bash", + Input: string(input), + } +} + +func TestBash_SimpleCommand(t *testing.T) { + resp, err := executeBash(context.Background(), bashCall("echo hello", 0), "") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if resp.IsError { + t.Fatalf("expected success, got error: %s", resp.Content) + } + if resp.Content != "hello\n" { + t.Errorf("expected 'hello\\n', got %q", resp.Content) + } +} + +func TestBash_TimeoutKillsProcess(t *testing.T) { + start := time.Now() + resp, err := executeBash(context.Background(), bashCall("sleep 60", 2), "") + elapsed := time.Since(start) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !resp.IsError { + t.Fatal("expected error response for timed-out command") + } + if elapsed > 10*time.Second { + t.Errorf("command took %v, expected ~2s timeout", elapsed) + } +} + +func TestBash_BackgroundProcessDoesNotHang(t *testing.T) { + // This command spawns a background sleep that would hold pipes open + // forever if we didn't have process group killing + WaitDelay. + start := time.Now() + resp, err := executeBash(context.Background(), bashCall("echo done; sleep 3600 &", 5), "") + elapsed := time.Since(start) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + // The foreground command (echo) should complete quickly + if elapsed > 5*time.Second { + t.Errorf("command took %v, should complete in <5s (background process should not block)", elapsed) + } + if resp.IsError { + t.Fatalf("expected success, got error: %s", resp.Content) + } +} + +func TestBash_BackgroundProcessDoesNotHang_Streaming(t *testing.T) { + // Same test but in streaming mode (with output callback). + ctx := ContextWithToolOutputCallback(context.Background(), func(_, _, _ string, _ bool) {}) + start := time.Now() + resp, err := executeBash(ctx, bashCall("echo streaming; sleep 3600 &", 5), "") + elapsed := time.Since(start) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if elapsed > 5*time.Second { + t.Errorf("streaming command took %v, should complete in <5s", elapsed) + } + if resp.IsError { + t.Fatalf("expected success, got error: %s", resp.Content) + } +} + +func TestBash_ContextCancellation(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + + done := make(chan struct{}) + go func() { + defer close(done) + _, _ = executeBash(ctx, bashCall("sleep 60", 0), "") + }() + + // Cancel after a short delay + time.Sleep(500 * time.Millisecond) + cancel() + + // Should return promptly after cancellation + select { + case <-done: + // success + case <-time.After(5 * time.Second): + t.Fatal("executeBash did not return after context cancellation") + } +} + +func TestBash_BannedCommand(t *testing.T) { + resp, err := executeBash(context.Background(), bashCall("alias foo=bar", 0), "") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !resp.IsError { + t.Fatal("expected error for banned command") + } +} + +func TestBash_EmptyCommand(t *testing.T) { + resp, err := executeBash(context.Background(), bashCall("", 0), "") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !resp.IsError { + t.Fatal("expected error for empty command") + } +}