Skip to content

Commit

Permalink
Merge pull request #488 from thewh1teagle/thewh1teagle/patch-1
Browse files Browse the repository at this point in the history
Thewh1teagle/patch 1
  • Loading branch information
MarcusDunn authored Aug 31, 2024
2 parents 951afc3 + 5da6a85 commit 301c4e2
Show file tree
Hide file tree
Showing 10 changed files with 58 additions and 68 deletions.
9 changes: 1 addition & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[workspace]
resolver = "2"
members = ["llama-cpp-sys-2", "llama-cpp-2", "embeddings", "examples/usage", "examples/simple"]
members = ["llama-cpp-sys-2", "llama-cpp-2", "embeddings", "examples/simple"]

[workspace.dependencies]
# core library deps
Expand Down
20 changes: 10 additions & 10 deletions examples/usage/src/main.rs → examples/usage.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,30 @@
//! # Usage
//!
//!
//! This is just about the smallest possible way to do inference. To fetch a model from hugging face:
//!
//! ```bash
//!
//! ```console
//! git clone --recursive https://github.com/utilityai/llama-cpp-rs
//! cd llama-cpp-rs/examples/usage
//! wget https://huggingface.co/Qwen/Qwen2-1.5B-Instruct-GGUF/resolve/main/qwen2-1_5b-instruct-q4_0.gguf
//! cargo run --bin usage -- qwen2-1_5b-instruct-q4_0.gguf
//! cargo run --example usage -- qwen2-1_5b-instruct-q4_0.gguf
//! ```
use std::io::Write;
use llama_cpp_2::context::params::LlamaContextParams;
use llama_cpp_2::llama_backend::LlamaBackend;
use llama_cpp_2::llama_batch::LlamaBatch;
use llama_cpp_2::model::params::LlamaModelParams;
use llama_cpp_2::model::LlamaModel;
use llama_cpp_2::model::{AddBos, Special};
use llama_cpp_2::token::data_array::LlamaTokenDataArray;
use std::io::Write;

#[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)]
fn main() {
let model_path = std::env::args().nth(1).expect("Please specify model path");
let backend = LlamaBackend::init().unwrap();
let params = LlamaModelParams::default();

let prompt = "<|im_start|>user\nHello! how are you?<|im_end|>\n<|im_start|>assistant\n".to_string();
let prompt =
"<|im_start|>user\nHello! how are you?<|im_end|>\n<|im_start|>assistant\n".to_string();
LlamaContextParams::default();
let model =
LlamaModel::load_from_file(&backend, model_path, &params).expect("unable to load model");
Expand All @@ -48,14 +49,11 @@ fn main() {
}
ctx.decode(&mut batch).expect("llama_decode() failed");


let mut n_cur = batch.n_tokens();


// The `Decoder`
let mut decoder = encoding_rs::UTF_8.new_decoder();


while n_cur <= n_len {
// sample the next token
{
Expand All @@ -72,7 +70,9 @@ fn main() {
break;
}

let output_bytes = model.token_to_bytes(new_token_id, Special::Tokenize).unwrap();
let output_bytes = model
.token_to_bytes(new_token_id, Special::Tokenize)
.unwrap();
// use `Decoder.decode_to_string()` to avoid the intermediate buffer
let mut output_string = String::with_capacity(32);
let _decode_result = decoder.decode_to_string(&output_bytes, &mut output_string, false);
Expand Down
19 changes: 0 additions & 19 deletions examples/usage/Cargo.toml

This file was deleted.

7 changes: 7 additions & 0 deletions llama-cpp-2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.69" }
thiserror = { workspace = true }
tracing = { workspace = true }

[dev-dependencies]
encoding_rs = { workspace = true }

[features]
cuda = ["llama-cpp-sys-2/cuda"]
metal = ["llama-cpp-sys-2/metal"]
Expand All @@ -32,3 +35,7 @@ workspace = true

[package.metadata.docs.rs]
features = ["sampler"]

[[example]]
name = "usage"
path = "../examples/usage.rs"
2 changes: 1 addition & 1 deletion llama-cpp-2/src/context/sample/sampler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//! like [`crate::context::LlamaContext`] or token history to the sampler.
//!
//! # Example
//!
//!
//! **Llama.cpp default sampler**
//!
//! ```rust
Expand Down
8 changes: 6 additions & 2 deletions llama-cpp-2/src/model.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! A safe wrapper around `llama_model`.
use std::ffi::CString;
use std::ffi::CStr;
use std::ffi::CString;
use std::num::NonZeroU16;
use std::os::raw::c_int;
use std::path::Path;
Expand Down Expand Up @@ -550,7 +550,11 @@ impl LlamaModel {
if res > buff.len() as i32 {
return Err(ApplyChatTemplateError::BuffSizeError);
}
Ok::<String, ApplyChatTemplateError>(CStr::from_ptr(buff.as_mut_ptr()).to_string_lossy().to_string())
Ok::<String, ApplyChatTemplateError>(
CStr::from_ptr(buff.as_mut_ptr())
.to_string_lossy()
.to_string(),
)
}?;
Ok(formatted_chat)
}
Expand Down
8 changes: 2 additions & 6 deletions llama-cpp-2/src/model/params/kv_overrides.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,13 @@ impl ParamOverrideValue {
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_bool: *value }
}
ParamOverrideValue::Float(value) => {
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 {
val_f64: *value,
}
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_f64: *value }
}
ParamOverrideValue::Int(value) => {
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_i64: *value }
}
ParamOverrideValue::Str(c_string) => {
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 {
val_str: *c_string,
}
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_str: *c_string }
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion llama-cpp-2/src/token_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl TryFrom<llama_cpp_sys_2::llama_token_type> for LlamaTokenAttrs {
type Error = LlamaTokenTypeFromIntError;

fn try_from(value: llama_cpp_sys_2::llama_vocab_type) -> Result<Self, Self::Error> {
Ok(Self(BitFlags::from_bits(value).map_err(|e| {
Ok(Self(BitFlags::from_bits(value as _).map_err(|e| {
LlamaTokenTypeFromIntError::UnknownValue(e.invalid_bits())
})?))
}
Expand Down
49 changes: 29 additions & 20 deletions llama-cpp-sys-2/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,9 @@ fn compile_bindings(
llama_header_path: &Path,
) -> Result<(), Box<dyn std::error::Error + 'static>> {
println!("Generating bindings..");

let includes = [
llama_header_path.join("ggml").join("include"),
];


let includes = [llama_header_path.join("ggml").join("include")];

let bindings = bindgen::Builder::default()
.clang_args(includes.map(|path| format!("-I{}", path.to_string_lossy())))
.header(
Expand Down Expand Up @@ -425,9 +423,7 @@ fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'stati
// nvcc.flag("-Wno-pedantic");
// }

for lib in [
"cuda", "cublas", "cudart", "cublasLt"
] {
for lib in ["cuda", "cublas", "cudart", "cublasLt"] {
println!("cargo:rustc-link-lib={}", lib);
}
if !nvcc.get_compiler().is_like_msvc() {
Expand Down Expand Up @@ -623,31 +619,44 @@ fn gen_vulkan_shaders(out_path: impl AsRef<Path>) -> (impl AsRef<Path>, impl AsR
.cpp(true)
.get_compiler();

assert!(!cxx.is_like_msvc(), "Compiling Vulkan GGML with MSVC is not supported at this time.");
assert!(
!cxx.is_like_msvc(),
"Compiling Vulkan GGML with MSVC is not supported at this time."
);

let vulkan_shaders_gen_bin = out_path.as_ref().join("vulkan-shaders-gen");

cxx.to_command()
.args([
vulkan_shaders_src.join("vulkan-shaders-gen.cpp").as_os_str(),
"-o".as_ref(), vulkan_shaders_gen_bin.as_os_str()
vulkan_shaders_src
.join("vulkan-shaders-gen.cpp")
.as_os_str(),
"-o".as_ref(),
vulkan_shaders_gen_bin.as_os_str(),
])
.output().expect("Could not compile Vulkan shader generator");
.output()
.expect("Could not compile Vulkan shader generator");

let header = out_path.as_ref().join("ggml-vulkan-shaders.hpp");
let source = out_path.as_ref().join("ggml-vulkan-shaders.cpp");

Command::new(vulkan_shaders_gen_bin)
.args([
"--glslc".as_ref(), "glslc".as_ref(),
"--input-dir".as_ref(), vulkan_shaders_src.as_os_str(),
"--output-dir".as_ref(), out_path.as_ref().join("vulkan-shaders.spv").as_os_str(),
"--target-hpp".as_ref(), header.as_os_str(),
"--target-cpp".as_ref(), source.as_os_str(),
"--no-clean".as_ref()
"--glslc".as_ref(),
"glslc".as_ref(),
"--input-dir".as_ref(),
vulkan_shaders_src.as_os_str(),
"--output-dir".as_ref(),
out_path.as_ref().join("vulkan-shaders.spv").as_os_str(),
"--target-hpp".as_ref(),
header.as_os_str(),
"--target-cpp".as_ref(),
source.as_os_str(),
"--no-clean".as_ref(),
])
.output().expect("Could not run Vulkan shader generator");

.output()
.expect("Could not run Vulkan shader generator");

(out_path, source)
}

Expand Down

0 comments on commit 301c4e2

Please sign in to comment.