updated README.md

edgenai · Apr 4, 2024 · 49bae57 · 49bae57
1 parent 892368d
commit 49bae57
Showing 1 changed file with 29 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -9,11 +9,11 @@ ML experience required!
 
 ```rust
 // Create a model from anything that implements `AsRef<Path>`:
-let model = LlamaModel::load_from_file("path_to_model.gguf", LlamaParams::default()).expect("Could not load model");
+let model = LlamaModel::load_from_file("path_to_model.gguf", LlamaParams::default ()).expect("Could not load model");
 
 // A `LlamaModel` holds the weights shared across many _sessions_; while your model may be
 // several gigabytes large, a session is typically a few dozen to a hundred megabytes!
-let mut ctx = model.create_session(SessionParams::default()).expect("Failed to create session");
+let mut ctx = model.create_session(SessionParams::default ()).expect("Failed to create session");
 
 // You can feed anything that implements `AsRef<[u8]>` into the model's context.
 ctx.advance_context("This is the story of a man named Stanley.").unwrap();
@@ -24,17 +24,17 @@ let mut decoded_tokens = 0;
 
 // `ctx.start_completing_with` creates a worker thread that generates tokens. When the completion
 // handle is dropped, tokens stop generating!
-let mut completions = ctx.start_completing_with(StandardSampler::default(), 1024).into_strings();
+let mut completions = ctx.start_completing_with(StandardSampler::default (), 1024).into_strings();
 
 for completion in completions {
-    print!("{completion}");
-    let _ = io::stdout().flush();
+print!("{completion}");
+let _ = io::stdout().flush();
 
-    decoded_tokens += 1;
+decoded_tokens += 1;
 
-    if decoded_tokens > max_tokens {
-        break;
-    }
+if decoded_tokens > max_tokens {
+break;
+}
 }
 ```
 
@@ -43,10 +43,28 @@ llama.cpp's low-level C API (`crates/llama_cpp_sys`). Contributions are welcome-
 
 ## Building
 
-Keep in mind that [llama.cpp](https://github.com/ggerganov/llama.cpp) is very computationally heavy, meaning standard 
-debug builds (running just `cargo build`/`cargo run`) will suffer greatly from the lack of optimisations. Therefore, unless 
+Keep in mind that [llama.cpp](https://github.com/ggerganov/llama.cpp) is very computationally heavy, meaning standard
+debug builds (running just `cargo build`/`cargo run`) will suffer greatly from the lack of optimisations. Therefore,
+unless
 debugging is really necessary, it is highly recommended to build and run using Cargo's `--release` flag.
 
+### Cargo Features
+
+Several of [llama.cpp](https://github.com/ggerganov/llama.cpp)'s backends are supported through features:
+
+- `cuda` - Enables the CUDA backend, the CUDA Toolkit is required for compilation if this feature is enabled.
+- `vulkan` - Enables the Vulkan backend, the Vulkan SDK is required for compilation if this feature is enabled.
+- `metal` - Enables the Metal backend, macOS only.
+- `hipblas` - Enables the hipBLAS/ROCm backend, ROCm is required for compilation if this feature is enabled.
+
+## Experimental
+
+Something that's provided by these bindings is the ability to predict context size in memory, however it should be
+noted that this is a highly experimental feature as this isn't something
+that [llama.cpp](https://github.com/ggerganov/llama.cpp) itself provides.
+The returned values may be highly inaccurate, however an attempt is made to never return values lower than the real
+size.
+
 ## License
 
 MIT or Apache-2.0, at your option (the "Rust" license). See `LICENSE-MIT` and `LICENSE-APACHE`.