Skip to content

Commit

Permalink
Expose flash attention
Browse files Browse the repository at this point in the history
  • Loading branch information
brittlewis12 committed Sep 25, 2024
1 parent 218ae67 commit 25e87b6
Showing 1 changed file with 30 additions and 0 deletions.
30 changes: 30 additions & 0 deletions llama-cpp-2/src/context/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,36 @@ impl LlamaContextParams {
self.context_params.n_ubatch
}

/// Set the `flash_attention` parameter
///
/// # Examples
///
/// ```rust
/// use llama_cpp_2::context::params::LlamaContextParams;
/// let params = LlamaContextParams::default()
/// .with_flash_attention(true);
/// assert_eq!(params.flash_attention(), true);
/// ```
#[must_use]
pub fn with_flash_attention(mut self, enabled: bool) -> Self {
self.context_params.flash_attn = enabled;
self
}

/// Get the `flash_attention` parameter
///
/// # Examples
///
/// ```rust
/// use llama_cpp_2::context::params::LlamaContextParams;
/// let params = LlamaContextParams::default();
/// assert_eq!(params.flash_attention(), false);
/// ```
#[must_use]
pub fn flash_attention(&self) -> bool {
self.context_params.flash_attn
}

/// Set the type of rope scaling.
///
/// # Examples
Expand Down

0 comments on commit 25e87b6

Please sign in to comment.