@@ -149,31 +149,31 @@ static const std::map<e_model, size_t> & MEM_REQ_EVAL()
149
149
}
150
150
151
151
// amount of VRAM needed per batch size to hold temporary results
152
- // the values for 3b and 65b are not derived from testing but instead chosen conservatively
152
+ // the values for 3b are not derived from testing but instead chosen conservatively
153
153
static const std::map<e_model, size_t > & VRAM_REQ_SCRATCH_BASE ()
154
154
{
155
155
static std::map<e_model, size_t > k_sizes = {
156
156
{ MODEL_3B, 512ull * kB },
157
157
{ MODEL_7B, 512ull * kB },
158
158
{ MODEL_13B, 640ull * kB },
159
159
{ MODEL_30B, 768ull * kB },
160
- { MODEL_65B, 1536ull * kB },
161
- { MODEL_70B, 1536ull * kB }, // TODO (likely can be reduced)
160
+ { MODEL_65B, 1280ull * kB },
161
+ { MODEL_70B, 1280ull * kB },
162
162
};
163
163
return k_sizes;
164
164
}
165
165
166
166
// amount of VRAM needed per batch size and context to hold temporary results
167
- // the values for 3b and 65b are not derived from testing but instead chosen conservatively
167
+ // the values for 3b are not derived from testing but instead chosen conservatively
168
168
static const std::map<e_model, size_t > & VRAM_REQ_SCRATCH_PER_CONTEXT ()
169
169
{
170
170
static std::map<e_model, size_t > k_sizes = {
171
171
{ MODEL_3B, 128ull },
172
172
{ MODEL_7B, 128ull },
173
173
{ MODEL_13B, 160ull },
174
174
{ MODEL_30B, 208ull },
175
- { MODEL_65B, 416ull },
176
- { MODEL_70B, 416ull }, // TODO (likely can be reduced)
175
+ { MODEL_65B, 256ull },
176
+ { MODEL_70B, 256ull },
177
177
};
178
178
return k_sizes;
179
179
}
0 commit comments