From e1c94cb362a3cc3395598512bcf7fa078f3657bf Mon Sep 17 00:00:00 2001 From: Ed Zynda Date: Fri, 17 Apr 2026 11:59:49 +0300 Subject: [PATCH] =?UTF-8?q?fix(sdk):=20align=20SDK=20max-tokens=20floor=20?= =?UTF-8?q?with=20CLI=20default=20(4096=20=E2=86=92=208192)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SDK last-resort MaxTokens floor is applied in kit.New() when Options.MaxTokens, KIT_MAX_TOKENS, .kit.yml, and per-model defaults are all unset. It was 4096 (inherited from the old setSDKDefaults viper default) while the CLI --max-tokens cobra default is 8192. Bump the floor to 8192 so SDK and CLI callers start from the same base value before rightSizeMaxTokens runs, then update README, skills/kit-sdk/SKILL.md, and www/pages/{configuration,sdk/options}.md to match. --- README.md | 2 +- pkg/kit/config.go | 6 ++++-- pkg/kit/kit.go | 17 +++++++++-------- skills/kit-sdk/SKILL.md | 4 ++-- www/pages/configuration.md | 2 +- www/pages/sdk/options.md | 7 ++++--- 6 files changed, 21 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 35f6af68..4653d1ca 100644 --- a/README.md +++ b/README.md @@ -547,7 +547,7 @@ host, err := kit.New(ctx, &kit.Options{ Quiet: true, // Generation parameters (override env/config/per-model defaults) - MaxTokens: 16384, // 0 = auto-resolve (env → config → per-model → 4096 floor) + MaxTokens: 16384, // 0 = auto-resolve (env → config → per-model → 8192 floor) ThinkingLevel: "medium", // "off", "low", "medium", "high" Temperature: ptr(float32(0.2)), // pointer so 0.0 != unset; nil = provider default TopP: nil, // nil = leave provider/per-model default diff --git a/pkg/kit/config.go b/pkg/kit/config.go index ae4a7dc6..7ff28bf8 100644 --- a/pkg/kit/config.go +++ b/pkg/kit/config.go @@ -40,10 +40,12 @@ Guidelines: // sdkDefaultMaxTokens is the last-resort ceiling applied when the SDK caller // has not configured max-tokens via Options, env, config, or a per-model -// default. It is intentionally applied on the *models.ProviderConfig struct +// default. It matches the CLI's --max-tokens cobra default so SDK and CLI +// callers see the same base value before per-model right-sizing runs. +// It is intentionally applied on the *models.ProviderConfig struct // (not via viper) so that viper.IsSet("max-tokens") remains false and the // right-sizing + per-model-default paths continue to work. -const sdkDefaultMaxTokens = 4096 +const sdkDefaultMaxTokens = 8192 // setSDKDefaults registers viper defaults that match the CLI's cobra flag // defaults for keys where SetDefault does not interfere with downstream diff --git a/pkg/kit/kit.go b/pkg/kit/kit.go index fcfcac61..632bde53 100644 --- a/pkg/kit/kit.go +++ b/pkg/kit/kit.go @@ -825,20 +825,21 @@ type Options struct { // .kit.yml / KIT_* environment variables. Leaving a field at its // zero/nil value means "use the configured default", which in turn // falls back to per-model defaults (modelSettings / customModels) and - // finally to a last-resort SDK floor of 4096 for MaxTokens (sampling - // params fall through to provider-level defaults). + // finally to a last-resort SDK floor of 8192 for MaxTokens (matching + // the CLI --max-tokens default; sampling params fall through to + // provider-level defaults). // // Pointer types are used for sampling parameters so the SDK can // distinguish "explicitly set to 0" from "leave alone". // MaxTokens overrides the maximum output tokens per LLM response. // 0 = let the precedence chain resolve a value (env → config → - // per-model → 4096 SDK floor). Setting a non-zero value here - // suppresses automatic right-sizing, matching the CLI's - // --max-tokens flag semantics. Bump this when generating long - // outputs (HTML artifacts, large refactors, etc.) to avoid silent - // truncation mid-tool-call. The cap also applies after model - // switches via [Kit.SetModel]. + // per-model → 8192 SDK floor, matching the CLI default). Setting a + // non-zero value here suppresses automatic right-sizing, matching + // the CLI's --max-tokens flag semantics. Bump this when generating + // long outputs (HTML artifacts, large refactors, etc.) to avoid + // silent truncation mid-tool-call. The cap also applies after + // model switches via [Kit.SetModel]. MaxTokens int // ThinkingLevel sets the reasoning effort for models that support diff --git a/skills/kit-sdk/SKILL.md b/skills/kit-sdk/SKILL.md index a708054e..0dc1a02e 100644 --- a/skills/kit-sdk/SKILL.md +++ b/skills/kit-sdk/SKILL.md @@ -83,7 +83,7 @@ host, err := kit.New(ctx, &kit.Options{ // Generation parameters — override env/config/per-model defaults. // Leaving a field at its zero/nil value lets the precedence chain // resolve a value (KIT_* env → .kit.yml → modelSettings/customModels → - // 4096 floor for MaxTokens, provider defaults for samplers). + // 8192 floor for MaxTokens, provider defaults for samplers). MaxTokens: 16384, // 0 = auto-resolve; non-zero suppresses right-sizing ThinkingLevel: "medium", // "off", "low", "medium", "high" ("" = default) Temperature: ptrFloat32(0.2), // pointer so explicit 0.0 != unset @@ -148,7 +148,7 @@ func ptrFloat32(v float32) *float32 { return &v } | Field | Type | Empty/nil means | Notes | |-------|------|-----------------|-------| -| `MaxTokens` | `int` | Auto-resolve (env → config → per-model → 4096 floor) | Non-zero suppresses `rightSizeMaxTokens` | +| `MaxTokens` | `int` | Auto-resolve (env → config → per-model → 8192 floor) | Non-zero suppresses `rightSizeMaxTokens` | | `ThinkingLevel` | `string` | Auto-resolve (→ `"off"`) | Valid: `"off"`, `"low"`, `"medium"`, `"high"` (and `"minimal"` for some providers) | | `Temperature` | `*float32` | Leave provider/per-model default | Pointer so explicit `0.0` ≠ unset | | `TopP` | `*float32` | Leave provider/per-model default | | diff --git a/www/pages/configuration.md b/www/pages/configuration.md index c8735bac..2176f07d 100644 --- a/www/pages/configuration.md +++ b/www/pages/configuration.md @@ -189,7 +189,7 @@ For the generation and provider parameters documented above, the resolved value 4. `.kit.yml` / `.kit.yaml` / `.kit.json` (project-local, then global) 5. Per-model defaults (`modelSettings[provider/model]` / `customModels[...].params`) 6. Provider-level defaults (e.g. Anthropic's own temperature default) -7. SDK last-resort floor — currently a 4096 output-token ceiling when nothing else is configured +7. SDK last-resort floor — currently an 8192 output-token ceiling matching the CLI `--max-tokens` default, auto-raised per-model up to 32768 when the model's catalog ceiling is higher See the [SDK options reference](/sdk/options) for the full list of `kit.Options` fields that map to these keys. diff --git a/www/pages/sdk/options.md b/www/pages/sdk/options.md index 0c69cb03..29a55911 100644 --- a/www/pages/sdk/options.md +++ b/www/pages/sdk/options.md @@ -96,8 +96,9 @@ host, err := kit.New(ctx, &kit.Options{ These fields override the corresponding values from `.kit.yml` / `KIT_*` environment variables. Leaving a field at its zero/nil value lets the precedence chain resolve a value (`KIT_*` env → config file → per-model -defaults from `modelSettings`/`customModels` → a 4096 SDK floor for -`MaxTokens` and provider-level defaults for samplers). +defaults from `modelSettings`/`customModels` → an 8192 SDK floor for +`MaxTokens` (matching the CLI `--max-tokens` default) and provider-level +defaults for samplers). | Field | Type | Default | Description | |-------|------|---------|-------------| @@ -174,7 +175,7 @@ in this order (highest priority first): 3. `.kit.yml` (project-local then `~/.kit.yml`) 4. Per-model defaults (`modelSettings[provider/model]` or `customModels[...].params`) 5. Provider-level defaults (e.g. Anthropic's own temperature default) -6. SDK last-resort floor (currently: `MaxTokens = 4096`) +6. SDK last-resort floor (currently: `MaxTokens = 8192`, matching the CLI `--max-tokens` default) Sampling params that remain `nil` after the SDK resolution step are left out of the provider call entirely, so the LLM library applies its own default.