From e1c94cb362a3cc3395598512bcf7fa078f3657bf Mon Sep 17 00:00:00 2001
From: Ed Zynda <ezynda3@gmail.com>
Date: Fri, 17 Apr 2026 11:59:49 +0300
Subject: [PATCH] =?UTF-8?q?fix(sdk):=20align=20SDK=20max-tokens=20floor=20?=
 =?UTF-8?q?with=20CLI=20default=20(4096=20=E2=86=92=208192)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The SDK last-resort MaxTokens floor is applied in kit.New() when
Options.MaxTokens, KIT_MAX_TOKENS, .kit.yml, and per-model defaults
are all unset. It was 4096 (inherited from the old setSDKDefaults
viper default) while the CLI --max-tokens cobra default is 8192.

Bump the floor to 8192 so SDK and CLI callers start from the same
base value before rightSizeMaxTokens runs, then update README,
skills/kit-sdk/SKILL.md, and www/pages/{configuration,sdk/options}.md
to match.
---
 README.md                  |  2 +-
 pkg/kit/config.go          |  6 ++++--
 pkg/kit/kit.go             | 17 +++++++++--------
 skills/kit-sdk/SKILL.md    |  4 ++--
 www/pages/configuration.md |  2 +-
 www/pages/sdk/options.md   |  7 ++++---
 6 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 35f6af68..4653d1ca 100644
--- a/README.md
+++ b/README.md
@@ -547,7 +547,7 @@ host, err := kit.New(ctx, &kit.Options{
     Quiet:        true,
 
     // Generation parameters (override env/config/per-model defaults)
-    MaxTokens:        16384,             // 0 = auto-resolve (env → config → per-model → 4096 floor)
+    MaxTokens:        16384,             // 0 = auto-resolve (env → config → per-model → 8192 floor)
     ThinkingLevel:    "medium",          // "off", "low", "medium", "high"
     Temperature:      ptr(float32(0.2)), // pointer so 0.0 != unset; nil = provider default
     TopP:             nil,                // nil = leave provider/per-model default
diff --git a/pkg/kit/config.go b/pkg/kit/config.go
index ae4a7dc6..7ff28bf8 100644
--- a/pkg/kit/config.go
+++ b/pkg/kit/config.go
@@ -40,10 +40,12 @@ Guidelines:
 
 // sdkDefaultMaxTokens is the last-resort ceiling applied when the SDK caller
 // has not configured max-tokens via Options, env, config, or a per-model
-// default. It is intentionally applied on the *models.ProviderConfig struct
+// default. It matches the CLI's --max-tokens cobra default so SDK and CLI
+// callers see the same base value before per-model right-sizing runs.
+// It is intentionally applied on the *models.ProviderConfig struct
 // (not via viper) so that viper.IsSet("max-tokens") remains false and the
 // right-sizing + per-model-default paths continue to work.
-const sdkDefaultMaxTokens = 4096
+const sdkDefaultMaxTokens = 8192
 
 // setSDKDefaults registers viper defaults that match the CLI's cobra flag
 // defaults for keys where SetDefault does not interfere with downstream
diff --git a/pkg/kit/kit.go b/pkg/kit/kit.go
index fcfcac61..632bde53 100644
--- a/pkg/kit/kit.go
+++ b/pkg/kit/kit.go
@@ -825,20 +825,21 @@ type Options struct {
 	// .kit.yml / KIT_* environment variables. Leaving a field at its
 	// zero/nil value means "use the configured default", which in turn
 	// falls back to per-model defaults (modelSettings / customModels) and
-	// finally to a last-resort SDK floor of 4096 for MaxTokens (sampling
-	// params fall through to provider-level defaults).
+	// finally to a last-resort SDK floor of 8192 for MaxTokens (matching
+	// the CLI --max-tokens default; sampling params fall through to
+	// provider-level defaults).
 	//
 	// Pointer types are used for sampling parameters so the SDK can
 	// distinguish "explicitly set to 0" from "leave alone".
 
 	// MaxTokens overrides the maximum output tokens per LLM response.
 	// 0 = let the precedence chain resolve a value (env → config →
-	// per-model → 4096 SDK floor). Setting a non-zero value here
-	// suppresses automatic right-sizing, matching the CLI's
-	// --max-tokens flag semantics. Bump this when generating long
-	// outputs (HTML artifacts, large refactors, etc.) to avoid silent
-	// truncation mid-tool-call. The cap also applies after model
-	// switches via [Kit.SetModel].
+	// per-model → 8192 SDK floor, matching the CLI default). Setting a
+	// non-zero value here suppresses automatic right-sizing, matching
+	// the CLI's --max-tokens flag semantics. Bump this when generating
+	// long outputs (HTML artifacts, large refactors, etc.) to avoid
+	// silent truncation mid-tool-call. The cap also applies after
+	// model switches via [Kit.SetModel].
 	MaxTokens int
 
 	// ThinkingLevel sets the reasoning effort for models that support
diff --git a/skills/kit-sdk/SKILL.md b/skills/kit-sdk/SKILL.md
index a708054e..0dc1a02e 100644
--- a/skills/kit-sdk/SKILL.md
+++ b/skills/kit-sdk/SKILL.md
@@ -83,7 +83,7 @@ host, err := kit.New(ctx, &kit.Options{
     // Generation parameters — override env/config/per-model defaults.
     // Leaving a field at its zero/nil value lets the precedence chain
     // resolve a value (KIT_* env → .kit.yml → modelSettings/customModels →
-    // 4096 floor for MaxTokens, provider defaults for samplers).
+    // 8192 floor for MaxTokens, provider defaults for samplers).
     MaxTokens:        16384,             // 0 = auto-resolve; non-zero suppresses right-sizing
     ThinkingLevel:    "medium",          // "off", "low", "medium", "high" ("" = default)
     Temperature:      ptrFloat32(0.2),   // pointer so explicit 0.0 != unset
@@ -148,7 +148,7 @@ func ptrFloat32(v float32) *float32 { return &v }
 
 | Field | Type | Empty/nil means | Notes |
 |-------|------|-----------------|-------|
-| `MaxTokens` | `int` | Auto-resolve (env → config → per-model → 4096 floor) | Non-zero suppresses `rightSizeMaxTokens` |
+| `MaxTokens` | `int` | Auto-resolve (env → config → per-model → 8192 floor) | Non-zero suppresses `rightSizeMaxTokens` |
 | `ThinkingLevel` | `string` | Auto-resolve (→ `"off"`) | Valid: `"off"`, `"low"`, `"medium"`, `"high"` (and `"minimal"` for some providers) |
 | `Temperature` | `*float32` | Leave provider/per-model default | Pointer so explicit `0.0` ≠ unset |
 | `TopP` | `*float32` | Leave provider/per-model default | |
diff --git a/www/pages/configuration.md b/www/pages/configuration.md
index c8735bac..2176f07d 100644
--- a/www/pages/configuration.md
+++ b/www/pages/configuration.md
@@ -189,7 +189,7 @@ For the generation and provider parameters documented above, the resolved value
 4. `.kit.yml` / `.kit.yaml` / `.kit.json` (project-local, then global)
 5. Per-model defaults (`modelSettings[provider/model]` / `customModels[...].params`)
 6. Provider-level defaults (e.g. Anthropic's own temperature default)
-7. SDK last-resort floor — currently a 4096 output-token ceiling when nothing else is configured
+7. SDK last-resort floor — currently an 8192 output-token ceiling matching the CLI `--max-tokens` default, auto-raised per-model up to 32768 when the model's catalog ceiling is higher
 
 See the [SDK options reference](/sdk/options) for the full list of `kit.Options` fields that map to these keys.
 
diff --git a/www/pages/sdk/options.md b/www/pages/sdk/options.md
index 0c69cb03..29a55911 100644
--- a/www/pages/sdk/options.md
+++ b/www/pages/sdk/options.md
@@ -96,8 +96,9 @@ host, err := kit.New(ctx, &kit.Options{
 These fields override the corresponding values from `.kit.yml` / `KIT_*`
 environment variables. Leaving a field at its zero/nil value lets the
 precedence chain resolve a value (`KIT_*` env → config file → per-model
-defaults from `modelSettings`/`customModels` → a 4096 SDK floor for
-`MaxTokens` and provider-level defaults for samplers).
+defaults from `modelSettings`/`customModels` → an 8192 SDK floor for
+`MaxTokens` (matching the CLI `--max-tokens` default) and provider-level
+defaults for samplers).
 
 | Field | Type | Default | Description |
 |-------|------|---------|-------------|
@@ -174,7 +175,7 @@ in this order (highest priority first):
 3. `.kit.yml` (project-local then `~/.kit.yml`)
 4. Per-model defaults (`modelSettings[provider/model]` or `customModels[...].params`)
 5. Provider-level defaults (e.g. Anthropic's own temperature default)
-6. SDK last-resort floor (currently: `MaxTokens = 4096`)
+6. SDK last-resort floor (currently: `MaxTokens = 8192`, matching the CLI `--max-tokens` default)
 
 Sampling params that remain `nil` after the SDK resolution step are left out
 of the provider call entirely, so the LLM library applies its own default.