Skip to content

Commit 2433863

Browse files
author
Chris Warren-Smith
committed
LLAMA: nitro added check for memory shift
1 parent 02c66f2 commit 2433863

2 files changed

Lines changed: 10 additions & 2 deletions

File tree

llama/llama-sb.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ Llama::Llama() :
6464
_n_system_tokens(0),
6565
_is_gemma4(false),
6666
_sampler_dirty(false),
67+
_can_shift(false),
6768
_seed(LLAMA_DEFAULT_SEED) {
6869
llama_log_set([](enum ggml_log_level level, const char *text, void *user_data) {
6970
Llama *llama = (Llama *)user_data;
@@ -103,6 +104,7 @@ Llama::Llama(Llama &&other) noexcept
103104
, _n_system_tokens(other._n_system_tokens)
104105
, _is_gemma4(other._is_gemma4)
105106
, _sampler_dirty(other._sampler_dirty)
107+
, _can_shift(other._can_shift)
106108
, _seed(other._seed) {
107109
}
108110

@@ -179,9 +181,10 @@ bool Llama::load_model(string model_path, int n_ctx, int n_batch, int n_gpu_laye
179181
set_last_error("Create context");
180182
} else {
181183
_vocab = llama_model_get_vocab(_model);
184+
_template = llama_model_chat_template(_model, nullptr);
185+
_is_gemma4 = (_template.find("<|turn>model") != string::npos);
186+
_can_shift = llama_memory_can_shift(llama_get_memory(_ctx));
182187
}
183-
_template = llama_model_chat_template(_model, nullptr);
184-
_is_gemma4 = (_template.find("<|turn>model") != string::npos);
185188
}
186189

187190
return _last_error.empty();
@@ -579,6 +582,10 @@ bool Llama::make_space_for_tokens(int n_tokens) {
579582
_last_error = "Can't make enough space while keeping num_system_tokens tokens";
580583
return false;
581584
}
585+
if (!_can_shift) {
586+
_last_error = "Memory type doesn't support shifting, can't evict mid-sequence";
587+
return false;
588+
}
582589

583590
llama_pos remove_start = pos_min + _n_system_tokens;
584591

llama/llama-sb.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,5 +148,6 @@ struct Llama {
148148
int _n_system_tokens;
149149
bool _is_gemma4;
150150
bool _sampler_dirty;
151+
bool _can_shift;
151152
unsigned int _seed;
152153
};

0 commit comments

Comments
 (0)