Files
kit/internal/models/gemini/gemini.go
T
Shane McDonald c4aa911e3a feat: add Google Vertex AI support for Claude models (#146)
Add support for using Claude models via Google Cloud Vertex AI through
the `google-vertex-anthropic` provider. This enables users who have
Claude access through their Google Cloud account to use mcphost with
Vertex AI authentication.

Changes:
- Add `google-vertex-anthropic` provider case and createVertexAnthropicProvider()
- Support multiple env var names for project/region to match eino-claude:
  - Project: ANTHROPIC_VERTEX_PROJECT_ID, GOOGLE_CLOUD_PROJECT, GCLOUD_PROJECT
  - Region: CLOUD_ML_REGION (defaults to "global" if not set)
- Upgrade eino from v0.5.11 to v0.7.11 (required by eino-claude v0.1.12)
- Migrate schema API from OpenAPI v3 to JSON Schema (eino v0.7.11 change)

Usage:
  # Authenticate with Google Cloud
  gcloud auth application-default login

  # Set required environment variables
  export ANTHROPIC_VERTEX_PROJECT_ID="your-project-id"
  export CLOUD_ML_REGION="us-east5"  # or use default "global"

  # Run mcphost
  mcphost --model google-vertex-anthropic:claude-sonnet-4@20250514

Reference: https://docs.anthropic.com/en/docs/claude-code/google-vertex-ai

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-10 13:01:18 +03:00

792 lines
22 KiB
Go

package gemini
import (
"context"
"encoding/json"
"errors"
"fmt"
"runtime/debug"
"github.com/cloudwego/eino/callbacks"
"github.com/cloudwego/eino/components"
"github.com/cloudwego/eino/components/model"
"github.com/cloudwego/eino/schema"
"github.com/eino-contrib/jsonschema"
"github.com/getkin/kin-openapi/openapi3"
"google.golang.org/genai"
)
var _ model.ToolCallingChatModel = (*ChatModel)(nil)
// NewChatModel creates a new Gemini chat model instance.
// It initializes a Google Gemini model with the specified configuration,
// supporting both text generation and tool calling capabilities.
//
// Parameters:
// - ctx: The context for the operation (currently unused but kept for interface consistency)
// - cfg: Configuration for the Gemini model including client, model name, and parameters
//
// Returns:
// - *ChatModel: A Gemini chat model instance implementing ToolCallingChatModel
// - error: Any error that occurred during creation
//
// Example:
//
// client, _ := genai.NewClient(ctx, &genai.ClientConfig{
// APIKey: "your-api-key",
// })
// model, err := gemini.NewChatModel(ctx, &gemini.Config{
// Client: client,
// Model: "gemini-pro",
// MaxTokens: &maxTokens,
// })
func NewChatModel(_ context.Context, cfg *Config) (*ChatModel, error) {
return &ChatModel{
cli: cfg.Client,
model: cfg.Model,
maxTokens: cfg.MaxTokens,
temperature: cfg.Temperature,
topP: cfg.TopP,
topK: cfg.TopK,
responseSchema: cfg.ResponseSchema,
enableCodeExecution: cfg.EnableCodeExecution,
safetySettings: cfg.SafetySettings,
}, nil
}
// Config contains the configuration options for the Gemini model
type Config struct {
// Client is the Gemini API client instance
// Required for making API calls to Gemini
Client *genai.Client
// Model specifies which Gemini model to use
// Examples: "gemini-pro", "gemini-pro-vision", "gemini-1.5-flash"
Model string
// MaxTokens limits the maximum number of tokens in the response
// Optional. Example: maxTokens := 100
MaxTokens *int
// Temperature controls randomness in responses
// Range: [0.0, 1.0], where 0.0 is more focused and 1.0 is more creative
// Optional. Example: temperature := float32(0.7)
Temperature *float32
// TopP controls diversity via nucleus sampling
// Range: [0.0, 1.0], where 1.0 disables nucleus sampling
// Optional. Example: topP := float32(0.95)
TopP *float32
// TopK controls diversity by limiting the top K tokens to sample from
// Optional. Example: topK := int32(40)
TopK *int32
// ResponseSchema defines the structure for JSON responses
// Optional. Used when you want structured output in JSON format
ResponseSchema *jsonschema.Schema
// EnableCodeExecution allows the model to execute code
// Warning: Be cautious with code execution in production
// Optional. Default: false
EnableCodeExecution bool
// SafetySettings configures content filtering for different harm categories
// Controls the model's filtering behavior for potentially harmful content
// Optional.
SafetySettings []*genai.SafetySetting
}
// options contains Gemini-specific options for model configuration.
// These are options that are specific to the Gemini API and not part
// of the common model options interface.
type options struct {
// TopK limits the number of tokens to sample from
TopK *int32
// ResponseSchema defines the expected JSON structure for responses
ResponseSchema *jsonschema.Schema
}
// ChatModel implements the Gemini chat model for the eino framework.
// It provides integration with Google's Gemini API, supporting both
// text generation and tool calling capabilities.
type ChatModel struct {
// cli is the Gemini API client instance
cli *genai.Client
// model specifies which Gemini model to use
model string
// maxTokens limits the response length
maxTokens *int
// topP controls nucleus sampling
topP *float32
// temperature controls randomness
temperature *float32
// topK limits token sampling
topK *int32
// responseSchema for structured JSON output
responseSchema *jsonschema.Schema
// tools converted to Gemini format
tools []*genai.Tool
// origTools stores the original tool definitions
origTools []*schema.ToolInfo
// toolChoice controls how tools are used
toolChoice *schema.ToolChoice
// enableCodeExecution allows code execution (use with caution)
enableCodeExecution bool
// safetySettings for content filtering
safetySettings []*genai.SafetySetting
}
// Generate generates a single response from the Gemini model.
// It processes the input messages and returns a complete response.
//
// Parameters:
// - ctx: Context for the operation, supporting cancellation and callbacks
// - input: The conversation history as a slice of messages
// - opts: Optional configuration options for the generation
//
// Returns:
// - *schema.Message: The generated response message with content and metadata
// - error: Any error that occurred during generation
func (cm *ChatModel) Generate(ctx context.Context, input []*schema.Message, opts ...model.Option) (message *schema.Message, err error) {
ctx = callbacks.EnsureRunInfo(ctx, cm.GetType(), components.ComponentOfChatModel)
config, conf, err := cm.buildGenerateConfig(opts...)
if err != nil {
return nil, err
}
ctx = callbacks.OnStart(ctx, &model.CallbackInput{
Messages: input,
Tools: model.GetCommonOptions(&model.Options{Tools: cm.origTools}, opts...).Tools,
Config: conf,
})
defer func() {
if err != nil {
callbacks.OnError(ctx, err)
}
}()
if len(input) == 0 {
return nil, fmt.Errorf("gemini input is empty")
}
contents, err := cm.convertSchemaMessages(input)
if err != nil {
return nil, err
}
result, err := cm.cli.Models.GenerateContent(ctx, cm.model, contents, config)
if err != nil {
return nil, fmt.Errorf("generate content failed: %w", err)
}
message, err = cm.convertResponse(result)
if err != nil {
return nil, fmt.Errorf("convert response failed: %w", err)
}
callbacks.OnEnd(ctx, cm.convertCallbackOutput(message, conf))
return message, nil
}
// Stream generates a streaming response from the Gemini model.
// It allows incremental processing of the model's output as it's generated.
//
// Parameters:
// - ctx: Context for the operation, supporting cancellation and callbacks
// - input: The conversation history as a slice of messages
// - opts: Optional configuration options for the generation
//
// Returns:
// - *schema.StreamReader[*schema.Message]: A reader for the streaming response
// - error: Any error that occurred during stream setup
func (cm *ChatModel) Stream(ctx context.Context, input []*schema.Message, opts ...model.Option) (result *schema.StreamReader[*schema.Message], err error) {
ctx = callbacks.EnsureRunInfo(ctx, cm.GetType(), components.ComponentOfChatModel)
config, conf, err := cm.buildGenerateConfig(opts...)
if err != nil {
return nil, err
}
ctx = callbacks.OnStart(ctx, &model.CallbackInput{
Messages: input,
Tools: model.GetCommonOptions(&model.Options{Tools: cm.origTools}, opts...).Tools,
Config: conf,
})
defer func() {
if err != nil {
callbacks.OnError(ctx, err)
}
}()
if len(input) == 0 {
return nil, fmt.Errorf("gemini input is empty")
}
contents, err := cm.convertSchemaMessages(input)
if err != nil {
return nil, err
}
sr, sw := schema.Pipe[*model.CallbackOutput](1)
go func() {
defer func() {
panicErr := recover()
if panicErr != nil {
_ = sw.Send(nil, newPanicErr(panicErr, debug.Stack()))
}
sw.Close()
}()
for resp, err := range cm.cli.Models.GenerateContentStream(ctx, cm.model, contents, config) {
if err != nil {
sw.Send(nil, err)
return
}
message, err := cm.convertResponse(resp)
if err != nil {
sw.Send(nil, err)
return
}
closed := sw.Send(cm.convertCallbackOutput(message, conf), nil)
if closed {
return
}
}
}()
srList := sr.Copy(2)
callbacks.OnEndWithStreamOutput(ctx, srList[0])
return schema.StreamReaderWithConvert(srList[1], func(t *model.CallbackOutput) (*schema.Message, error) {
return t.Message, nil
}), nil
}
// WithTools creates a new model instance with the specified tools available.
// It returns a new ChatModel with tools configured for function calling.
// The original model instance remains unchanged.
//
// Parameters:
// - tools: A slice of tool definitions that the model can use
//
// Returns:
// - model.ToolCallingChatModel: A new model instance with tools enabled
// - error: Returns an error if no tools provided or conversion fails
func (cm *ChatModel) WithTools(tools []*schema.ToolInfo) (model.ToolCallingChatModel, error) {
if len(tools) == 0 {
return nil, errors.New("no tools to bind")
}
gTools, err := cm.convertToGeminiTools(tools)
if err != nil {
return nil, fmt.Errorf("convert to gemini tools failed: %w", err)
}
tc := schema.ToolChoiceAllowed
ncm := *cm
ncm.toolChoice = &tc
ncm.tools = gTools
ncm.origTools = tools
return &ncm, nil
}
// BindTools binds tools to the current model instance.
// Unlike WithTools, this modifies the current instance rather than
// creating a new one. Tools are set to "allowed" mode by default.
//
// Parameters:
// - tools: A slice of tool definitions to bind to the model
//
// Returns:
// - error: Returns an error if no tools provided or conversion fails
func (cm *ChatModel) BindTools(tools []*schema.ToolInfo) error {
if len(tools) == 0 {
return errors.New("no tools to bind")
}
gTools, err := cm.convertToGeminiTools(tools)
if err != nil {
return err
}
cm.tools = gTools
cm.origTools = tools
tc := schema.ToolChoiceAllowed
cm.toolChoice = &tc
return nil
}
// BindForcedTools binds tools to the current model instance in forced mode.
// This ensures the model will always use one of the provided tools
// rather than generating a text response.
//
// Parameters:
// - tools: A slice of tool definitions to bind to the model
//
// Returns:
// - error: Returns an error if no tools provided or conversion fails
func (cm *ChatModel) BindForcedTools(tools []*schema.ToolInfo) error {
if len(tools) == 0 {
return errors.New("no tools to bind")
}
gTools, err := cm.convertToGeminiTools(tools)
if err != nil {
return err
}
cm.tools = gTools
cm.origTools = tools
tc := schema.ToolChoiceForced
cm.toolChoice = &tc
return nil
}
func (cm *ChatModel) buildGenerateConfig(opts ...model.Option) (*genai.GenerateContentConfig, *model.Config, error) {
commonOptions := model.GetCommonOptions(&model.Options{
Temperature: cm.temperature,
MaxTokens: cm.maxTokens,
TopP: cm.topP,
Tools: nil,
ToolChoice: cm.toolChoice,
}, opts...)
geminiOptions := model.GetImplSpecificOptions(&options{
TopK: cm.topK,
ResponseSchema: cm.responseSchema,
}, opts...)
conf := &model.Config{}
config := &genai.GenerateContentConfig{}
// Set model
if commonOptions.Model != nil {
conf.Model = *commonOptions.Model
} else {
conf.Model = cm.model
}
// Set temperature
if commonOptions.Temperature != nil {
conf.Temperature = *commonOptions.Temperature
config.Temperature = commonOptions.Temperature
} else if cm.temperature != nil {
conf.Temperature = *cm.temperature
config.Temperature = cm.temperature
}
// Set max tokens
if commonOptions.MaxTokens != nil {
conf.MaxTokens = *commonOptions.MaxTokens
config.MaxOutputTokens = int32(*commonOptions.MaxTokens)
} else if cm.maxTokens != nil {
conf.MaxTokens = *cm.maxTokens
config.MaxOutputTokens = int32(*cm.maxTokens)
}
// Set top P
if commonOptions.TopP != nil {
conf.TopP = *commonOptions.TopP
config.TopP = commonOptions.TopP
} else if cm.topP != nil {
conf.TopP = *cm.topP
config.TopP = cm.topP
}
// Set top K
if geminiOptions.TopK != nil {
config.TopK = genai.Ptr(float32(*geminiOptions.TopK))
} else if cm.topK != nil {
config.TopK = genai.Ptr(float32(*cm.topK))
}
// Set tools
tools := cm.tools
if commonOptions.Tools != nil {
var err error
tools, err = cm.convertToGeminiTools(commonOptions.Tools)
if err != nil {
return nil, nil, err
}
}
if len(tools) > 0 {
config.Tools = tools
}
// Set tool choice
if commonOptions.ToolChoice != nil {
switch *commonOptions.ToolChoice {
case schema.ToolChoiceForbidden:
config.ToolConfig = &genai.ToolConfig{
FunctionCallingConfig: &genai.FunctionCallingConfig{
Mode: genai.FunctionCallingConfigModeNone,
},
}
case schema.ToolChoiceAllowed:
config.ToolConfig = &genai.ToolConfig{
FunctionCallingConfig: &genai.FunctionCallingConfig{
Mode: genai.FunctionCallingConfigModeAuto,
},
}
case schema.ToolChoiceForced:
if len(tools) == 0 {
return nil, nil, fmt.Errorf("tool choice is forced but no tools provided")
}
config.ToolConfig = &genai.ToolConfig{
FunctionCallingConfig: &genai.FunctionCallingConfig{
Mode: genai.FunctionCallingConfigModeAny,
},
}
default:
return nil, nil, fmt.Errorf("tool choice=%s not supported", *commonOptions.ToolChoice)
}
}
// Set safety settings
if len(cm.safetySettings) > 0 {
config.SafetySettings = cm.safetySettings
}
// Set response schema for JSON mode
if geminiOptions.ResponseSchema != nil {
gSchema, err := cm.convertJSONSchema(geminiOptions.ResponseSchema)
if err != nil {
return nil, nil, fmt.Errorf("convert response schema failed: %w", err)
}
config.ResponseMIMEType = "application/json"
config.ResponseSchema = gSchema
}
return config, conf, nil
}
func (cm *ChatModel) convertToGeminiTools(tools []*schema.ToolInfo) ([]*genai.Tool, error) {
if len(tools) == 0 {
return nil, nil
}
var functionDeclarations []*genai.FunctionDeclaration
for _, tool := range tools {
openSchema, err := tool.ToJSONSchema()
if err != nil {
return nil, fmt.Errorf("get open schema failed: %w", err)
}
gSchema, err := cm.convertJSONSchema(openSchema)
if err != nil {
return nil, fmt.Errorf("convert open schema failed: %w", err)
}
funcDecl := &genai.FunctionDeclaration{
Name: tool.Name,
Description: tool.Desc,
Parameters: gSchema,
}
functionDeclarations = append(functionDeclarations, funcDecl)
}
return []*genai.Tool{{FunctionDeclarations: functionDeclarations}}, nil
}
func (cm *ChatModel) convertJSONSchema(schema *jsonschema.Schema) (*genai.Schema, error) {
if schema == nil {
return nil, nil
}
result := &genai.Schema{
Description: schema.Description,
}
switch schema.Type {
case openapi3.TypeObject:
result.Type = genai.TypeObject
if schema.Properties != nil {
properties := make(map[string]*genai.Schema)
for pair := schema.Properties.Oldest(); pair != nil; pair = pair.Next() {
propSchema, err := cm.convertJSONSchema(pair.Value)
if err != nil {
return nil, err
}
properties[pair.Key] = propSchema
}
result.Properties = properties
}
if schema.Required != nil {
result.Required = schema.Required
}
case openapi3.TypeArray:
result.Type = genai.TypeArray
if schema.Items != nil {
itemSchema, err := cm.convertJSONSchema(schema.Items)
if err != nil {
return nil, err
}
result.Items = itemSchema
}
case openapi3.TypeString:
result.Type = genai.TypeString
if schema.Enum != nil {
enums := make([]string, 0, len(schema.Enum))
for _, e := range schema.Enum {
if str, ok := e.(string); ok {
enums = append(enums, str)
} else {
return nil, fmt.Errorf("enum value must be a string, schema: %+v", schema)
}
}
result.Enum = enums
}
case openapi3.TypeNumber:
result.Type = genai.TypeNumber
case openapi3.TypeInteger:
result.Type = genai.TypeInteger
case openapi3.TypeBoolean:
result.Type = genai.TypeBoolean
default:
result.Type = genai.TypeUnspecified
}
return result, nil
}
func (cm *ChatModel) convertSchemaMessages(messages []*schema.Message) ([]*genai.Content, error) {
var contents []*genai.Content
for _, message := range messages {
content, err := cm.convertSchemaMessage(message)
if err != nil {
return nil, fmt.Errorf("convert schema message failed: %w", err)
}
if content != nil {
contents = append(contents, content)
}
}
return contents, nil
}
func (cm *ChatModel) convertSchemaMessage(message *schema.Message) (*genai.Content, error) {
if message == nil {
return nil, nil
}
var parts []*genai.Part
// Handle tool calls
if message.ToolCalls != nil {
for _, call := range message.ToolCalls {
var args map[string]any
if err := json.Unmarshal([]byte(call.Function.Arguments), &args); err != nil {
return nil, fmt.Errorf("unmarshal tool call arguments failed: %w", err)
}
parts = append(parts, &genai.Part{
FunctionCall: &genai.FunctionCall{
Name: call.Function.Name,
Args: args,
},
})
}
}
// Handle tool responses
if message.Role == schema.Tool {
var response map[string]any
if err := json.Unmarshal([]byte(message.Content), &response); err != nil {
// If the content is not valid JSON, treat it as a plain text error response
response = map[string]any{
"error": message.Content,
}
}
parts = append(parts, &genai.Part{
FunctionResponse: &genai.FunctionResponse{
Name: message.ToolCallID,
Response: response,
},
})
} else {
// Handle text content
if message.Content != "" {
parts = append(parts, &genai.Part{Text: message.Content})
}
// Handle multi-content (images, audio, etc.)
for _, content := range message.MultiContent {
switch content.Type {
case schema.ChatMessagePartTypeText:
parts = append(parts, &genai.Part{Text: content.Text})
case schema.ChatMessagePartTypeImageURL:
if content.ImageURL != nil {
parts = append(parts, &genai.Part{
FileData: &genai.FileData{
MIMEType: content.ImageURL.MIMEType,
FileURI: content.ImageURL.URI,
},
})
}
case schema.ChatMessagePartTypeAudioURL:
if content.AudioURL != nil {
parts = append(parts, &genai.Part{
FileData: &genai.FileData{
MIMEType: content.AudioURL.MIMEType,
FileURI: content.AudioURL.URI,
},
})
}
case schema.ChatMessagePartTypeVideoURL:
if content.VideoURL != nil {
parts = append(parts, &genai.Part{
FileData: &genai.FileData{
MIMEType: content.VideoURL.MIMEType,
FileURI: content.VideoURL.URI,
},
})
}
case schema.ChatMessagePartTypeFileURL:
if content.FileURL != nil {
parts = append(parts, &genai.Part{
FileData: &genai.FileData{
MIMEType: content.FileURL.MIMEType,
FileURI: content.FileURL.URI,
},
})
}
}
}
}
if len(parts) == 0 {
return nil, nil
}
return &genai.Content{
Role: string(cm.convertRole(message.Role)),
Parts: parts,
}, nil
}
func (cm *ChatModel) convertRole(role schema.RoleType) genai.Role {
switch role {
case schema.Assistant:
return genai.RoleModel
case schema.User:
return genai.RoleUser
case schema.Tool:
return genai.RoleUser // Tool responses are treated as user messages in the new API
default:
return genai.RoleUser
}
}
func (cm *ChatModel) convertResponse(resp *genai.GenerateContentResponse) (*schema.Message, error) {
if len(resp.Candidates) == 0 {
return nil, fmt.Errorf("gemini result is empty")
}
candidate := resp.Candidates[0]
message := &schema.Message{
Role: schema.Assistant,
ResponseMeta: &schema.ResponseMeta{
FinishReason: string(candidate.FinishReason),
},
}
// Handle usage metadata
if resp.UsageMetadata != nil {
message.ResponseMeta.Usage = &schema.TokenUsage{
PromptTokens: int(resp.UsageMetadata.PromptTokenCount),
CompletionTokens: int(resp.UsageMetadata.CandidatesTokenCount),
TotalTokens: int(resp.UsageMetadata.TotalTokenCount),
}
}
// Process content parts
var textParts []string
for _, part := range candidate.Content.Parts {
switch {
case part.Text != "":
textParts = append(textParts, part.Text)
case part.FunctionCall != nil:
args, err := json.Marshal(part.FunctionCall.Args)
if err != nil {
return nil, fmt.Errorf("marshal function call arguments failed: %w", err)
}
message.ToolCalls = append(message.ToolCalls, schema.ToolCall{
ID: part.FunctionCall.Name,
Function: schema.FunctionCall{
Name: part.FunctionCall.Name,
Arguments: string(args),
},
})
case part.ExecutableCode != nil:
textParts = append(textParts, part.ExecutableCode.Code)
case part.CodeExecutionResult != nil:
textParts = append(textParts, part.CodeExecutionResult.Output)
}
}
// Set content
if len(textParts) == 1 {
message.Content = textParts[0]
} else if len(textParts) > 1 {
for _, text := range textParts {
message.MultiContent = append(message.MultiContent, schema.ChatMessagePart{
Type: schema.ChatMessagePartTypeText,
Text: text,
})
}
}
return message, nil
}
func (cm *ChatModel) convertCallbackOutput(message *schema.Message, conf *model.Config) *model.CallbackOutput {
callbackOutput := &model.CallbackOutput{
Message: message,
Config: conf,
}
if message.ResponseMeta != nil && message.ResponseMeta.Usage != nil {
callbackOutput.TokenUsage = &model.TokenUsage{
PromptTokens: message.ResponseMeta.Usage.PromptTokens,
CompletionTokens: message.ResponseMeta.Usage.CompletionTokens,
TotalTokens: message.ResponseMeta.Usage.TotalTokens,
}
}
return callbackOutput
}
// IsCallbacksEnabled indicates whether this model supports callbacks.
// For the Gemini model, callbacks are always enabled to support
// token usage tracking and other monitoring features.
//
// Returns:
// - bool: Always returns true for Gemini models
func (cm *ChatModel) IsCallbacksEnabled() bool {
return true
}
const typ = "Gemini"
// GetType returns the type identifier for this model.
// This is used for logging and debugging purposes to identify
// which model implementation is being used.
//
// Returns:
// - string: Returns "Gemini" as the model type
func (cm *ChatModel) GetType() string {
return typ
}
type panicErr struct {
info any
stack []byte
}
func (p *panicErr) Error() string {
return fmt.Sprintf("panic error: %v, \nstack: %s", p.info, string(p.stack))
}
func newPanicErr(info any, stack []byte) error {
return &panicErr{
info: info,
stack: stack,
}
}