Skip to content

Commit acfc547

Browse files
CUDA: tighter VRAM scratch size for 65b/70b (#2551)
1 parent 7ed8d1f commit acfc547

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

llama.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -149,31 +149,31 @@ static const std::map<e_model, size_t> & MEM_REQ_EVAL()
149149
}
150150

151151
// amount of VRAM needed per batch size to hold temporary results
152-
// the values for 3b and 65b are not derived from testing but instead chosen conservatively
152+
// the values for 3b are not derived from testing but instead chosen conservatively
153153
static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_BASE()
154154
{
155155
static std::map<e_model, size_t> k_sizes = {
156156
{ MODEL_3B, 512ull * kB },
157157
{ MODEL_7B, 512ull * kB },
158158
{ MODEL_13B, 640ull * kB },
159159
{ MODEL_30B, 768ull * kB },
160-
{ MODEL_65B, 1536ull * kB },
161-
{ MODEL_70B, 1536ull * kB }, // TODO (likely can be reduced)
160+
{ MODEL_65B, 1280ull * kB },
161+
{ MODEL_70B, 1280ull * kB },
162162
};
163163
return k_sizes;
164164
}
165165

166166
// amount of VRAM needed per batch size and context to hold temporary results
167-
// the values for 3b and 65b are not derived from testing but instead chosen conservatively
167+
// the values for 3b are not derived from testing but instead chosen conservatively
168168
static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_PER_CONTEXT()
169169
{
170170
static std::map<e_model, size_t> k_sizes = {
171171
{ MODEL_3B, 128ull },
172172
{ MODEL_7B, 128ull },
173173
{ MODEL_13B, 160ull },
174174
{ MODEL_30B, 208ull },
175-
{ MODEL_65B, 416ull },
176-
{ MODEL_70B, 416ull }, // TODO (likely can be reduced)
175+
{ MODEL_65B, 256ull },
176+
{ MODEL_70B, 256ull },
177177
};
178178
return k_sizes;
179179
}

0 commit comments

Comments
 (0)