Skip to content

Commit

Permalink
Expose offload_kqv to control GPU KV cache & KQV ops
Browse files Browse the repository at this point in the history
  • Loading branch information
brittlewis12 committed Sep 25, 2024
1 parent 25e87b6 commit 3fc30eb
Showing 1 changed file with 30 additions and 0 deletions.
30 changes: 30 additions & 0 deletions llama-cpp-2/src/context/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,36 @@ impl LlamaContextParams {
self.context_params.flash_attn
}

/// Set the `offload_kqv` parameter to control offloading KV cache & KQV ops to GPU
///
/// # Examples
///
/// ```rust
/// use llama_cpp_2::context::params::LlamaContextParams;
/// let params = LlamaContextParams::default()
/// .with_offload_kqv(false);
/// assert_eq!(params.offload_kqv(), false);
/// ```
#[must_use]
pub fn with_offload_kqv(mut self, enabled: bool) -> Self {
self.context_params.offload_kqv = enabled;
self
}

/// Get the `offload_kqv` parameter
///
/// # Examples
///
/// ```rust
/// use llama_cpp_2::context::params::LlamaContextParams;
/// let params = LlamaContextParams::default();
/// assert_eq!(params.offload_kqv(), true);
/// ```
#[must_use]
pub fn offload_kqv(&self) -> bool {
self.context_params.offload_kqv
}

/// Set the type of rope scaling.
///
/// # Examples
Expand Down

0 comments on commit 3fc30eb

Please sign in to comment.