Merge pull request #88 from milbk/main

Update RequestOptions
awaescher · Sep 26, 2024 · 331d4d4 · 331d4d4
2 parents d0bc647 + ebe5bb3
commit 331d4d4
Showing 1 changed file with 118 additions and 0 deletions.
diff --git a/src/Models/RequestOptions.cs b/src/Models/RequestOptions.cs
@@ -1,3 +1,4 @@
+using System.Buffers.Text;
 using System.Text.Json.Serialization;
 
 namespace OllamaSharp.Models;
@@ -58,6 +59,24 @@ public class RequestOptions
 	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
 	public int? NumGpu { get; set; }
 
+	/// <summary>
+	/// This option controls which GPU is used for small tensors. The overhead of
+	/// splitting the computation across all GPUs is not worthwhile. The GPU will
+	/// use slightly more VRAM to store a scratch buffer for temporary results.
+	/// By default, GPU 0 is used.
+	/// </summary>
+	[JsonPropertyName("main_gpu")]
+	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+	public int? MainGpu { get; set; }
+
+	/// <summary>
+	/// Prompt processing maximum batch size.
+	/// (Default: 512)
+	/// </summary>
+	[JsonPropertyName("num_batch")]
+	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+	public int? NumBatch { get; set; }
+
 	/// <summary>
 	/// Sets the number of threads to use during computation. By default,
 	/// Ollama will detect this for optimal performance.
@@ -68,6 +87,14 @@ public class RequestOptions
 	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
 	public int? NumThread { get; set; }
 
+	/// <summary>
+	/// Number of tokens to keep from the initial prompt.
+	/// (Default: 4, -1 = all)
+	/// </summary>
+	[JsonPropertyName("num_keep")]
+	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+	public int? NumKeep { get; set; }
+
 	/// <summary>
 	/// Sets how far back for the model to look back to prevent repetition.
 	/// (Default: 64, 0 = disabled, -1 = num_ctx)
@@ -85,6 +112,22 @@ public class RequestOptions
 	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
 	public float? RepeatPenalty { get; set; }
 
+	/// <summary>
+	/// The penalty to apply to tokens based on their presence in the prompt.
+	/// (Default: 0.0)
+	/// </summary>
+	[JsonPropertyName("presence_penalty")]
+	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+	public float? PresencePenalty { get; set; }
+
+	/// <summary>
+	/// The penalty to apply to tokens based on their frequency in the prompt.
+	/// (Default: 0.0)
+	/// </summary>
+	[JsonPropertyName("frequency_penalty")]
+	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+	public float? FrequencyPenalty { get; set; }
+
 	/// <summary>
 	/// The temperature of the model. Increasing the temperature will make the
 	/// model answer more creatively. (Default: 0.8)
@@ -155,4 +198,79 @@ public class RequestOptions
 	[JsonPropertyName("min_p")]
 	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
 	public float? MinP { get; set; }
+
+	/// <summary>
+	/// The typical-p value to use for sampling. Locally Typical Sampling implementation described in the paper
+	/// https://arxiv.org/abs/2202.00666. (Default: 1.0)
+	/// </summary>
+	[JsonPropertyName("typical_p")]
+	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+	public float? TypicalP { get; set; }
+
+	/// <summary>
+	/// Penalize newline tokens (Default: True)
+	/// </summary>
+	[JsonPropertyName("penalize_newline")]
+	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+	public bool? PenalizeNewline { get; set; }
+
+	/// <summary>
+	/// Models are mapped into memory by default, which allows the system to
+	/// load only the necessary parts as needed. Disabling mmap makes loading
+	/// slower but reduces pageouts if you're not using mlock. If the model is
+	/// bigger than your RAM, turning off mmap stops it from loading.
+	/// (Default: True)
+	/// </summary>
+	[JsonPropertyName("use_mmap")]
+	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+	public bool? UseMmap { get; set; }
+
+	/// <summary>
+	/// Lock the model in memory to prevent swapping. This can improve
+	/// performance, but it uses more RAM and may slow down loading.
+	/// (Default: False)
+	/// </summary>
+	[JsonPropertyName("use_mlock")]
+	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+	public bool? UseMlock { get; set; }
+
+	/// <summary>
+	/// Enable low VRAM mode.
+	/// (Default: False)
+	/// </summary>
+	[JsonPropertyName("low_vram")]
+	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+	public bool? LowVRAM { get; set; }
+
+	/// <summary>
+	/// Enable f16 key/value.
+	/// (Default: False)
+	/// </summary>
+	[JsonPropertyName("f16_kv")]
+	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+	public bool? F16Kv { get; set; }
+
+	/// <summary>
+	/// Return logits for all the tokens, not just the last one.
+	/// (Default: False)
+	/// </summary>
+	[JsonPropertyName("logits_all")]
+	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+	public bool? LogitsAll { get; set; }
+
+	/// <summary>
+	/// Load only the vocabulary, not the weights.
+	/// (Default: False)
+	/// </summary>
+	[JsonPropertyName("vocab_only")]
+	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+	public bool? VocabOnly { get; set; }
+
+	/// <summary>
+	///  Enable NUMA support.
+	/// (Default: False)
+	/// </summary>
+	[JsonPropertyName("numa")]
+	[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+	public bool? Numa { get; set; }
 }