mirror of
https://github.com/mark3labs/kit.git
synced 2026-06-13 19:20:06 +00:00
3bb20f5283
- Raise --max-tokens default from 4096 to 8192. - Auto-raise MaxTokens toward the model's catalog Limit.Output (capped at 32768) when the user hasn't set --max-tokens explicitly and no per-model modelSettings override applied. Prevents silent 4k/8k truncation on models that support 32k-262k output. - Surface FinishReasonLength at turn end: the app now subscribes to TurnEndEvent and renders a system-message banner explaining the current cap, the model's known ceiling, and how to raise it. Previously the TUI swallowed 'length' stops, producing 'ghost' truncations. - Export FinishReason* constants on pkg/kit (Stop, Length, ToolCalls, ContentFilter, Error, Other, Unknown) and fix stale comments that used Anthropic-style strings. - Add Kit.MaxTokens() and Kit.MaxOutputLimit() SDK accessors, backed by Agent.GetMaxTokens() which correctly returns 0 for providers that suppress the param (e.g. Codex OAuth). - Tests: rightSizeMaxTokens covers 7 paths (cap, raise, preserve, explicit flag, nil info, zero limit); handleTurnEnd covers length/ non-length/nil-sendFn and the fallback message formatter. - Docs: update configuration.md, cli/flags.md, and kit-extensions skill to reflect the new default and behavior.
149 lines
4.1 KiB
Go
149 lines
4.1 KiB
Go
package models
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/spf13/pflag"
|
|
"github.com/spf13/viper"
|
|
)
|
|
|
|
// bindMaxTokensFlag wires a fresh pflag-backed "max-tokens" key into viper so
|
|
// isExplicitlySet behaves the same way it does in production. Returns a
|
|
// cleanup function that removes the binding so sibling tests see a clean
|
|
// state.
|
|
func bindMaxTokensFlag(t *testing.T, args []string) func() {
|
|
t.Helper()
|
|
fs := pflag.NewFlagSet("test", pflag.ContinueOnError)
|
|
fs.Int("max-tokens", 8192, "")
|
|
if err := viper.BindPFlag("max-tokens", fs.Lookup("max-tokens")); err != nil {
|
|
t.Fatalf("BindPFlag: %v", err)
|
|
}
|
|
if err := fs.Parse(args); err != nil {
|
|
t.Fatalf("fs.Parse: %v", err)
|
|
}
|
|
return func() {
|
|
viper.Reset()
|
|
}
|
|
}
|
|
|
|
func TestRightSizeMaxTokens_RaisesWhenBelowCeiling(t *testing.T) {
|
|
cleanup := bindMaxTokensFlag(t, nil) // no args → flag.Changed = false
|
|
defer cleanup()
|
|
|
|
config := &ProviderConfig{MaxTokens: 8192}
|
|
modelInfo := &ModelInfo{
|
|
ID: "claude-sonnet-4-5",
|
|
Limit: Limit{Context: 200000, Output: 64000},
|
|
}
|
|
|
|
rightSizeMaxTokens(config, modelInfo)
|
|
|
|
if config.MaxTokens != 32768 {
|
|
t.Errorf("expected MaxTokens raised to defaultRightSizeCap (32768), got %d", config.MaxTokens)
|
|
}
|
|
}
|
|
|
|
func TestRightSizeMaxTokens_CapsAtDefaultRightSizeCap(t *testing.T) {
|
|
cleanup := bindMaxTokensFlag(t, nil)
|
|
defer cleanup()
|
|
|
|
config := &ProviderConfig{MaxTokens: 8192}
|
|
// Mistral Devstral has 262144 output — we should still cap at 32768.
|
|
modelInfo := &ModelInfo{
|
|
ID: "devstral-medium-latest",
|
|
Limit: Limit{Context: 262144, Output: 262144},
|
|
}
|
|
|
|
rightSizeMaxTokens(config, modelInfo)
|
|
|
|
if config.MaxTokens != defaultRightSizeCap {
|
|
t.Errorf("expected MaxTokens capped at %d, got %d", defaultRightSizeCap, config.MaxTokens)
|
|
}
|
|
}
|
|
|
|
func TestRightSizeMaxTokens_UsesExactOutputWhenBelowCap(t *testing.T) {
|
|
cleanup := bindMaxTokensFlag(t, nil)
|
|
defer cleanup()
|
|
|
|
config := &ProviderConfig{MaxTokens: 4096}
|
|
// Model with output limit smaller than the cap.
|
|
modelInfo := &ModelInfo{
|
|
ID: "gpt-4",
|
|
Limit: Limit{Context: 8192, Output: 8192},
|
|
}
|
|
|
|
rightSizeMaxTokens(config, modelInfo)
|
|
|
|
if config.MaxTokens != 8192 {
|
|
t.Errorf("expected MaxTokens raised to model output ceiling (8192), got %d", config.MaxTokens)
|
|
}
|
|
}
|
|
|
|
func TestRightSizeMaxTokens_DoesNotLowerCurrentValue(t *testing.T) {
|
|
cleanup := bindMaxTokensFlag(t, nil)
|
|
defer cleanup()
|
|
|
|
// User (via per-model settings, applied earlier) already bumped MaxTokens
|
|
// above the cap — we must not clobber their choice.
|
|
config := &ProviderConfig{MaxTokens: 100000}
|
|
modelInfo := &ModelInfo{
|
|
ID: "devstral-medium-latest",
|
|
Limit: Limit{Context: 262144, Output: 262144},
|
|
}
|
|
|
|
rightSizeMaxTokens(config, modelInfo)
|
|
|
|
if config.MaxTokens != 100000 {
|
|
t.Errorf("expected MaxTokens preserved at 100000, got %d", config.MaxTokens)
|
|
}
|
|
}
|
|
|
|
func TestRightSizeMaxTokens_RespectsExplicitFlag(t *testing.T) {
|
|
// Simulate `--max-tokens 4096` on the command line.
|
|
cleanup := bindMaxTokensFlag(t, []string{"--max-tokens", "4096"})
|
|
defer cleanup()
|
|
|
|
config := &ProviderConfig{MaxTokens: 4096}
|
|
modelInfo := &ModelInfo{
|
|
ID: "claude-sonnet-4-5",
|
|
Limit: Limit{Context: 200000, Output: 64000},
|
|
}
|
|
|
|
rightSizeMaxTokens(config, modelInfo)
|
|
|
|
if config.MaxTokens != 4096 {
|
|
t.Errorf("expected explicit --max-tokens to be preserved (4096), got %d", config.MaxTokens)
|
|
}
|
|
}
|
|
|
|
func TestRightSizeMaxTokens_NilModelInfo(t *testing.T) {
|
|
cleanup := bindMaxTokensFlag(t, nil)
|
|
defer cleanup()
|
|
|
|
config := &ProviderConfig{MaxTokens: 8192}
|
|
// Custom model / Ollama / unknown provider → no model info.
|
|
rightSizeMaxTokens(config, nil)
|
|
|
|
if config.MaxTokens != 8192 {
|
|
t.Errorf("expected MaxTokens unchanged with nil modelInfo, got %d", config.MaxTokens)
|
|
}
|
|
}
|
|
|
|
func TestRightSizeMaxTokens_ZeroOutputLimit(t *testing.T) {
|
|
cleanup := bindMaxTokensFlag(t, nil)
|
|
defer cleanup()
|
|
|
|
config := &ProviderConfig{MaxTokens: 8192}
|
|
// Model present in catalog but with no known output limit.
|
|
modelInfo := &ModelInfo{
|
|
ID: "unknown-model",
|
|
Limit: Limit{Context: 0, Output: 0},
|
|
}
|
|
|
|
rightSizeMaxTokens(config, modelInfo)
|
|
|
|
if config.MaxTokens != 8192 {
|
|
t.Errorf("expected MaxTokens unchanged with zero output limit, got %d", config.MaxTokens)
|
|
}
|
|
}
|