Files
kit/internal/models/rightsize_test.go
Ed Zynda 3bb20f5283 feat(models): surface and prevent silent max-tokens truncation
- Raise --max-tokens default from 4096 to 8192.
- Auto-raise MaxTokens toward the model's catalog Limit.Output (capped at
  32768) when the user hasn't set --max-tokens explicitly and no per-model
  modelSettings override applied. Prevents silent 4k/8k truncation on
  models that support 32k-262k output.
- Surface FinishReasonLength at turn end: the app now subscribes to
  TurnEndEvent and renders a system-message banner explaining the current
  cap, the model's known ceiling, and how to raise it. Previously the TUI
  swallowed 'length' stops, producing 'ghost' truncations.
- Export FinishReason* constants on pkg/kit (Stop, Length, ToolCalls,
  ContentFilter, Error, Other, Unknown) and fix stale comments that used
  Anthropic-style strings.
- Add Kit.MaxTokens() and Kit.MaxOutputLimit() SDK accessors, backed by
  Agent.GetMaxTokens() which correctly returns 0 for providers that
  suppress the param (e.g. Codex OAuth).
- Tests: rightSizeMaxTokens covers 7 paths (cap, raise, preserve,
  explicit flag, nil info, zero limit); handleTurnEnd covers length/
  non-length/nil-sendFn and the fallback message formatter.
- Docs: update configuration.md, cli/flags.md, and kit-extensions skill
  to reflect the new default and behavior.
2026-04-16 23:12:10 +03:00

149 lines
4.1 KiB
Go

package models
import (
"testing"
"github.com/spf13/pflag"
"github.com/spf13/viper"
)
// bindMaxTokensFlag wires a fresh pflag-backed "max-tokens" key into viper so
// isExplicitlySet behaves the same way it does in production. Returns a
// cleanup function that removes the binding so sibling tests see a clean
// state.
func bindMaxTokensFlag(t *testing.T, args []string) func() {
t.Helper()
fs := pflag.NewFlagSet("test", pflag.ContinueOnError)
fs.Int("max-tokens", 8192, "")
if err := viper.BindPFlag("max-tokens", fs.Lookup("max-tokens")); err != nil {
t.Fatalf("BindPFlag: %v", err)
}
if err := fs.Parse(args); err != nil {
t.Fatalf("fs.Parse: %v", err)
}
return func() {
viper.Reset()
}
}
func TestRightSizeMaxTokens_RaisesWhenBelowCeiling(t *testing.T) {
cleanup := bindMaxTokensFlag(t, nil) // no args → flag.Changed = false
defer cleanup()
config := &ProviderConfig{MaxTokens: 8192}
modelInfo := &ModelInfo{
ID: "claude-sonnet-4-5",
Limit: Limit{Context: 200000, Output: 64000},
}
rightSizeMaxTokens(config, modelInfo)
if config.MaxTokens != 32768 {
t.Errorf("expected MaxTokens raised to defaultRightSizeCap (32768), got %d", config.MaxTokens)
}
}
func TestRightSizeMaxTokens_CapsAtDefaultRightSizeCap(t *testing.T) {
cleanup := bindMaxTokensFlag(t, nil)
defer cleanup()
config := &ProviderConfig{MaxTokens: 8192}
// Mistral Devstral has 262144 output — we should still cap at 32768.
modelInfo := &ModelInfo{
ID: "devstral-medium-latest",
Limit: Limit{Context: 262144, Output: 262144},
}
rightSizeMaxTokens(config, modelInfo)
if config.MaxTokens != defaultRightSizeCap {
t.Errorf("expected MaxTokens capped at %d, got %d", defaultRightSizeCap, config.MaxTokens)
}
}
func TestRightSizeMaxTokens_UsesExactOutputWhenBelowCap(t *testing.T) {
cleanup := bindMaxTokensFlag(t, nil)
defer cleanup()
config := &ProviderConfig{MaxTokens: 4096}
// Model with output limit smaller than the cap.
modelInfo := &ModelInfo{
ID: "gpt-4",
Limit: Limit{Context: 8192, Output: 8192},
}
rightSizeMaxTokens(config, modelInfo)
if config.MaxTokens != 8192 {
t.Errorf("expected MaxTokens raised to model output ceiling (8192), got %d", config.MaxTokens)
}
}
func TestRightSizeMaxTokens_DoesNotLowerCurrentValue(t *testing.T) {
cleanup := bindMaxTokensFlag(t, nil)
defer cleanup()
// User (via per-model settings, applied earlier) already bumped MaxTokens
// above the cap — we must not clobber their choice.
config := &ProviderConfig{MaxTokens: 100000}
modelInfo := &ModelInfo{
ID: "devstral-medium-latest",
Limit: Limit{Context: 262144, Output: 262144},
}
rightSizeMaxTokens(config, modelInfo)
if config.MaxTokens != 100000 {
t.Errorf("expected MaxTokens preserved at 100000, got %d", config.MaxTokens)
}
}
func TestRightSizeMaxTokens_RespectsExplicitFlag(t *testing.T) {
// Simulate `--max-tokens 4096` on the command line.
cleanup := bindMaxTokensFlag(t, []string{"--max-tokens", "4096"})
defer cleanup()
config := &ProviderConfig{MaxTokens: 4096}
modelInfo := &ModelInfo{
ID: "claude-sonnet-4-5",
Limit: Limit{Context: 200000, Output: 64000},
}
rightSizeMaxTokens(config, modelInfo)
if config.MaxTokens != 4096 {
t.Errorf("expected explicit --max-tokens to be preserved (4096), got %d", config.MaxTokens)
}
}
func TestRightSizeMaxTokens_NilModelInfo(t *testing.T) {
cleanup := bindMaxTokensFlag(t, nil)
defer cleanup()
config := &ProviderConfig{MaxTokens: 8192}
// Custom model / Ollama / unknown provider → no model info.
rightSizeMaxTokens(config, nil)
if config.MaxTokens != 8192 {
t.Errorf("expected MaxTokens unchanged with nil modelInfo, got %d", config.MaxTokens)
}
}
func TestRightSizeMaxTokens_ZeroOutputLimit(t *testing.T) {
cleanup := bindMaxTokensFlag(t, nil)
defer cleanup()
config := &ProviderConfig{MaxTokens: 8192}
// Model present in catalog but with no known output limit.
modelInfo := &ModelInfo{
ID: "unknown-model",
Limit: Limit{Context: 0, Output: 0},
}
rightSizeMaxTokens(config, modelInfo)
if config.MaxTokens != 8192 {
t.Errorf("expected MaxTokens unchanged with zero output limit, got %d", config.MaxTokens)
}
}