Touchups for pr tool calling

* branch naming convention * hallucination minimization
pluralsh · Dec 4, 2024 · af09403 · af09403
1 parent d05fc9f
commit af09403
Show file tree

Hide file tree

Showing 21 changed files with 349 additions and 53 deletions.
diff --git a/assets/src/generated/graphql.ts b/assets/src/generated/graphql.ts
@@ -330,11 +330,15 @@ export type AnthropicSettings = {
   __typename?: 'AnthropicSettings';
   /** the anthropic model version to use */
   model?: Maybe<Scalars['String']['output']>;
+  /** the model to use for tool calls, which are less frequent and require more complex reasoning */
+  toolModel?: Maybe<Scalars['String']['output']>;
 };
 
 export type AnthropicSettingsAttributes = {
   accessToken?: InputMaybe<Scalars['String']['input']>;
   model?: InputMaybe<Scalars['String']['input']>;
+  /** the model to use for tool calls, which are less frequent and require more complex reasoning */
+  toolModel?: InputMaybe<Scalars['String']['input']>;
 };
 
 /** a representation of a kubernetes api deprecation */
@@ -635,6 +639,8 @@ export type AzureOpenaiAttributes = {
   endpoint: Scalars['String']['input'];
   /** the exact model you wish to use */
   model?: InputMaybe<Scalars['String']['input']>;
+  /** the model to use for tool calls, which are less frequent and require more complex reasoning */
+  toolModel?: InputMaybe<Scalars['String']['input']>;
 };
 
 /** Settings for configuring against Azure OpenAI */
@@ -644,6 +650,9 @@ export type AzureOpenaiSettings = {
   apiVersion?: Maybe<Scalars['String']['output']>;
   /** the endpoint of your azure openai version, should look like: https://{endpoint}/openai/deployments/{deployment-id} */
   endpoint: Scalars['String']['output'];
+  model?: Maybe<Scalars['String']['output']>;
+  /** the model to use for tool calls, which are less frequent and require more complex reasoning */
+  toolModel?: Maybe<Scalars['String']['output']>;
 };
 
 export type AzureSettingsAttributes = {
@@ -689,6 +698,8 @@ export type BedrockAiAttributes = {
   modelId: Scalars['String']['input'];
   /** aws secret access key to use, you can also use IRSA for self-hosted consoles */
   secretAccessKey?: InputMaybe<Scalars['String']['input']>;
+  /** the model to use for tool calls, which are less frequent and require more complex reasoning */
+  toolModelId?: InputMaybe<Scalars['String']['input']>;
 };
 
 /** Settings for usage of AWS Bedrock for LLMs */
@@ -698,6 +709,8 @@ export type BedrockAiSettings = {
   accessKeyId?: Maybe<Scalars['String']['output']>;
   /** the bedrock model to use */
   modelId: Scalars['String']['output'];
+  /** the model to use for tool calls, which are less frequent and require more complex reasoning */
+  toolModelId?: Maybe<Scalars['String']['output']>;
 };
 
 export type BindingAttributes = {
@@ -3879,13 +3892,17 @@ export type OllamaAttributes = {
   /** An http authorization header to use on calls to the Ollama api */
   authorization?: InputMaybe<Scalars['String']['input']>;
   model: Scalars['String']['input'];
+  /** the model to use for tool calls, which are less frequent and require more complex reasoning */
+  toolModel?: InputMaybe<Scalars['String']['input']>;
   url: Scalars['String']['input'];
 };
 
 /** Settings for a self-hosted ollama-based LLM deployment */
 export type OllamaSettings = {
   __typename?: 'OllamaSettings';
   model: Scalars['String']['output'];
+  /** the model to use for tool calls, which are less frequent and require more complex reasoning */
+  toolModel?: Maybe<Scalars['String']['output']>;
   /** the url your ollama deployment is hosted on */
   url: Scalars['String']['output'];
 };
@@ -3897,12 +3914,16 @@ export type OpenaiSettings = {
   baseUrl?: Maybe<Scalars['String']['output']>;
   /** the openai model version to use */
   model?: Maybe<Scalars['String']['output']>;
+  /** the model to use for tool calls, which are less frequent and require more complex reasoning */
+  toolModel?: Maybe<Scalars['String']['output']>;
 };
 
 export type OpenaiSettingsAttributes = {
   accessToken?: InputMaybe<Scalars['String']['input']>;
   baseUrl?: InputMaybe<Scalars['String']['input']>;
   model?: InputMaybe<Scalars['String']['input']>;
+  /** the model to use for tool calls, which are less frequent and require more complex reasoning */
+  toolModel?: InputMaybe<Scalars['String']['input']>;
 };
 
 export enum Operation {
@@ -9364,6 +9385,8 @@ export type VertexAiAttributes = {
   project: Scalars['String']['input'];
   /** optional service account json to auth to the GCP vertex apis */
   serviceAccountJson?: InputMaybe<Scalars['String']['input']>;
+  /** the model to use for tool calls, which are less frequent and require more complex reasoning */
+  toolModel?: InputMaybe<Scalars['String']['input']>;
 };
 
 /** Settings for usage of GCP VertexAI for LLMs */
@@ -9375,6 +9398,8 @@ export type VertexAiSettings = {
   model?: Maybe<Scalars['String']['output']>;
   /** the gcp project id to use */
   project: Scalars['String']['output'];
+  /** the model to use for tool calls, which are less frequent and require more complex reasoning */
+  toolModel?: Maybe<Scalars['String']['output']>;
 };
 
 export type VerticalPodAutoscaler = {

diff --git a/charts/controller/crds/deployments.plural.sh_deploymentsettings.yaml b/charts/controller/crds/deployments.plural.sh_deploymentsettings.yaml
@@ -81,6 +81,10 @@ spec:
                         - key
                         type: object
                         x-kubernetes-map-type: atomic
+                      toolModel:
+                        description: Model to use for tool calling, which is less
+                          frequent and often requires more advanced reasoning
+                        type: string
                     required:
                     - tokenSecretRef
                     type: object
@@ -122,6 +126,10 @@ spec:
                         - key
                         type: object
                         x-kubernetes-map-type: atomic
+                      toolModel:
+                        description: Model to use for tool calling, which is less
+                          frequent and often requires more advanced reasoning
+                        type: string
                     required:
                     - endpoint
                     - tokenSecretRef
@@ -158,6 +166,10 @@ spec:
                         - key
                         type: object
                         x-kubernetes-map-type: atomic
+                      toolModelId:
+                        description: Model to use for tool calling, which is less
+                          frequent and often requires more advanced reasoning
+                        type: string
                     required:
                     - modelId
                     type: object
@@ -196,6 +208,10 @@ spec:
                         - key
                         type: object
                         x-kubernetes-map-type: atomic
+                      toolModel:
+                        description: Model to use for tool calling, which is less
+                          frequent and often requires more advanced reasoning
+                        type: string
                       url:
                         description: URL is the url this model is queryable on
                         type: string
@@ -236,6 +252,10 @@ spec:
                         - key
                         type: object
                         x-kubernetes-map-type: atomic
+                      toolModel:
+                        description: Model to use for tool calling, which is less
+                          frequent and often requires more advanced reasoning
+                        type: string
                     required:
                     - tokenSecretRef
                     type: object
@@ -288,6 +308,10 @@ spec:
                         - key
                         type: object
                         x-kubernetes-map-type: atomic
+                      toolModel:
+                        description: Model to use for tool calling, which is less
+                          frequent and often requires more advanced reasoning
+                        type: string
                     required:
                     - location
                     - project

diff --git a/go/client/models_gen.go b/go/client/models_gen.go
diff --git a/go/controller/api/v1alpha1/deploymentsettings_types.go b/go/controller/api/v1alpha1/deploymentsettings_types.go
@@ -235,6 +235,7 @@ func (in *AISettings) Attributes(ctx context.Context, c client.Client, namespace
 			AccessToken: &token,
 			Model:       in.OpenAI.Model,
 			BaseURL:     in.OpenAI.BaseUrl,
+			ToolModel:   in.OpenAI.ToolModel,
 		}
 	case console.AiProviderAnthropic:
 		if in.Anthropic == nil {
@@ -249,6 +250,7 @@ func (in *AISettings) Attributes(ctx context.Context, c client.Client, namespace
 		attr.Anthropic = &console.AnthropicSettingsAttributes{
 			AccessToken: lo.ToPtr(token),
 			Model:       in.Anthropic.Model,
+			ToolModel:   in.Anthropic.ToolModel,
 		}
 	case console.AiProviderAzure:
 		if in.Azure == nil {
@@ -264,6 +266,7 @@ func (in *AISettings) Attributes(ctx context.Context, c client.Client, namespace
 			Endpoint:    in.Azure.Endpoint,
 			APIVersion:  in.Azure.ApiVersion,
 			Model:       in.Azure.Model,
+			ToolModel:   in.Azure.ToolModel,
 			AccessToken: token,
 		}
 	case console.AiProviderVertex:
@@ -295,6 +298,7 @@ func (in *AISettings) Attributes(ctx context.Context, c client.Client, namespace
 
 		attr.Bedrock = &console.BedrockAiAttributes{
 			ModelID:         in.Bedrock.ModelID,
+			ToolModelID:     in.Bedrock.ToolModelId,
 			AccessKeyID:     in.Bedrock.AccessKeyId,
 			SecretAccessKey: secret,
 		}
@@ -311,6 +315,7 @@ func (in *AISettings) Attributes(ctx context.Context, c client.Client, namespace
 		attr.Ollama = &console.OllamaAttributes{
 			URL:           in.Ollama.URL,
 			Model:         in.Ollama.Model,
+			ToolModel:     in.Ollama.ToolModel,
 			Authorization: auth,
 		}
 	}
@@ -324,6 +329,11 @@ type AIProviderSettings struct {
 	// +kubebuilder:validation:Optional
 	Model *string `json:"model,omitempty"`
 
+	// Model to use for tool calling, which is less frequent and often requires more advanced reasoning
+	//
+	// +kubebuilder:validation:Optional
+	ToolModel *string `json:"toolModel,omitempty"`
+
 	// A custom base url to use, for reimplementations of the same API scheme (for instance Together.ai uses the OpenAI API spec)
 	//
 	// +kubebuilder:validation:Optional
@@ -348,6 +358,11 @@ type OllamaSettings struct {
 	// +kubebuilder:validation:Required
 	Model string `json:"model"`
 
+	// Model to use for tool calling, which is less frequent and often requires more advanced reasoning
+	//
+	// +kubebuilder:validation:Optional
+	ToolModel *string `json:"toolModel,omitempty"`
+
 	// TokenSecretRef is a reference to the local secret holding the contents of a HTTP Authorization header
 	// to send to your ollama api in case authorization is required (eg for an instance hosted on a public network)
 	//
@@ -371,6 +386,11 @@ type AzureOpenAISettings struct {
 	// +kubebuilder:validation:Optional
 	Model *string `json:"model,omitempty"`
 
+	// Model to use for tool calling, which is less frequent and often requires more advanced reasoning
+	//
+	// +kubebuilder:validation:Optional
+	ToolModel *string `json:"toolModel,omitempty"`
+
 	// TokenSecretRef is a reference to the local secret holding the token to access
 	// the configured AI provider.
 	//
@@ -384,6 +404,11 @@ type BedrockSettings struct {
 	// +kubebuilder:validation:Required
 	ModelID string `json:"modelId"`
 
+	// Model to use for tool calling, which is less frequent and often requires more advanced reasoning
+	//
+	// +kubebuilder:validation:Optional
+	ToolModelId *string `json:"toolModelId,omitempty"`
+
 	// An AWS Access Key ID to use, can also use IRSA to acquire credentials
 	//
 	// +kubebuilder:validation:Optional
@@ -401,6 +426,11 @@ type VertexSettings struct {
 	// +kubebuilder:validation:Optional
 	Model *string `json:"model,omitempty"`
 
+	// Model to use for tool calling, which is less frequent and often requires more advanced reasoning
+	//
+	// +kubebuilder:validation:Optional
+	ToolModel *string `json:"toolModel,omitempty"`
+
 	// The GCP project you'll be using
 	//
 	// +kubebuilder:validation:Required