Skip to content

Commit

Permalink
fix for older phi3 models without swa
Browse files Browse the repository at this point in the history
  • Loading branch information
LostRuins committed Jul 28, 2024
1 parent 01afb28 commit 0029e36
Showing 1 changed file with 17 additions and 2 deletions.
19 changes: 17 additions & 2 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ static void zeros(std::ofstream & file, size_t n) {
}
}

static bool phi3swa_warning_showed = false; //to warn when old phi3 model has no SWA
static bool clblast_offload_fallback_mode = false; //used when regular offload will segfault
static int clblast_offload_fallback_layers = 0;
static int layer_name_to_number(std::string inputString)
Expand Down Expand Up @@ -4911,7 +4912,7 @@ static void llm_load_hparams(
} break;
case LLM_ARCH_PHI3:
{
ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa);
ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa,false);
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);

switch (hparams.n_layer) {
Expand Down Expand Up @@ -10807,7 +10808,21 @@ struct llm_build_context {
struct ggml_tensor * inp_pos = build_inp_pos();

// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
struct ggml_tensor * KQ_mask_swa = build_inp_KQ_mask_swa();
struct ggml_tensor * KQ_mask_swa;
if(hparams.n_swa==0)
{
if(!phi3swa_warning_showed)
{
phi3swa_warning_showed = true;
printf("\nWarning: PHI3 model did not contain sliding window!!!\nSWA is disabled. Model may need a new quant.\n");
}
KQ_mask_swa = build_inp_KQ_mask();
}
else
{
KQ_mask_swa = build_inp_KQ_mask_swa();
}


for (int il = 0; il < n_layer; ++il) {
auto residual = inpL;
Expand Down

0 comments on commit 0029e36

Please sign in to comment.