Skip to content

Commit

Permalink
Merge pull request #88 from milbk/main
Browse files Browse the repository at this point in the history
 Update RequestOptions
  • Loading branch information
awaescher authored Sep 26, 2024
2 parents d0bc647 + ebe5bb3 commit 331d4d4
Showing 1 changed file with 118 additions and 0 deletions.
118 changes: 118 additions & 0 deletions src/Models/RequestOptions.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using System.Buffers.Text;
using System.Text.Json.Serialization;

namespace OllamaSharp.Models;
Expand Down Expand Up @@ -58,6 +59,24 @@ public class RequestOptions
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public int? NumGpu { get; set; }

/// <summary>
/// This option controls which GPU is used for small tensors. The overhead of
/// splitting the computation across all GPUs is not worthwhile. The GPU will
/// use slightly more VRAM to store a scratch buffer for temporary results.
/// By default, GPU 0 is used.
/// </summary>
[JsonPropertyName("main_gpu")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public int? MainGpu { get; set; }

/// <summary>
/// Prompt processing maximum batch size.
/// (Default: 512)
/// </summary>
[JsonPropertyName("num_batch")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public int? NumBatch { get; set; }

/// <summary>
/// Sets the number of threads to use during computation. By default,
/// Ollama will detect this for optimal performance.
Expand All @@ -68,6 +87,14 @@ public class RequestOptions
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public int? NumThread { get; set; }

/// <summary>
/// Number of tokens to keep from the initial prompt.
/// (Default: 4, -1 = all)
/// </summary>
[JsonPropertyName("num_keep")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public int? NumKeep { get; set; }

/// <summary>
/// Sets how far back for the model to look back to prevent repetition.
/// (Default: 64, 0 = disabled, -1 = num_ctx)
Expand All @@ -85,6 +112,22 @@ public class RequestOptions
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public float? RepeatPenalty { get; set; }

/// <summary>
/// The penalty to apply to tokens based on their presence in the prompt.
/// (Default: 0.0)
/// </summary>
[JsonPropertyName("presence_penalty")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public float? PresencePenalty { get; set; }

/// <summary>
/// The penalty to apply to tokens based on their frequency in the prompt.
/// (Default: 0.0)
/// </summary>
[JsonPropertyName("frequency_penalty")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public float? FrequencyPenalty { get; set; }

/// <summary>
/// The temperature of the model. Increasing the temperature will make the
/// model answer more creatively. (Default: 0.8)
Expand Down Expand Up @@ -155,4 +198,79 @@ public class RequestOptions
[JsonPropertyName("min_p")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public float? MinP { get; set; }

/// <summary>
/// The typical-p value to use for sampling. Locally Typical Sampling implementation described in the paper
/// https://arxiv.org/abs/2202.00666. (Default: 1.0)
/// </summary>
[JsonPropertyName("typical_p")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public float? TypicalP { get; set; }

/// <summary>
/// Penalize newline tokens (Default: True)
/// </summary>
[JsonPropertyName("penalize_newline")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public bool? PenalizeNewline { get; set; }

/// <summary>
/// Models are mapped into memory by default, which allows the system to
/// load only the necessary parts as needed. Disabling mmap makes loading
/// slower but reduces pageouts if you're not using mlock. If the model is
/// bigger than your RAM, turning off mmap stops it from loading.
/// (Default: True)
/// </summary>
[JsonPropertyName("use_mmap")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public bool? UseMmap { get; set; }

/// <summary>
/// Lock the model in memory to prevent swapping. This can improve
/// performance, but it uses more RAM and may slow down loading.
/// (Default: False)
/// </summary>
[JsonPropertyName("use_mlock")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public bool? UseMlock { get; set; }

/// <summary>
/// Enable low VRAM mode.
/// (Default: False)
/// </summary>
[JsonPropertyName("low_vram")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public bool? LowVRAM { get; set; }

/// <summary>
/// Enable f16 key/value.
/// (Default: False)
/// </summary>
[JsonPropertyName("f16_kv")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public bool? F16Kv { get; set; }

/// <summary>
/// Return logits for all the tokens, not just the last one.
/// (Default: False)
/// </summary>
[JsonPropertyName("logits_all")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public bool? LogitsAll { get; set; }

/// <summary>
/// Load only the vocabulary, not the weights.
/// (Default: False)
/// </summary>
[JsonPropertyName("vocab_only")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public bool? VocabOnly { get; set; }

/// <summary>
/// Enable NUMA support.
/// (Default: False)
/// </summary>
[JsonPropertyName("numa")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public bool? Numa { get; set; }
}

0 comments on commit 331d4d4

Please sign in to comment.