1010#include < cassert>
1111#include < cmath>
1212#include < cstdio>
13+ #include < cerrno>
1314#include < cstring>
1415#include < fstream>
1516#include < map>
2324#include < unistd.h>
2425#include < sys/mman.h>
2526#include < sys/stat.h>
26- #else
27- #include < errno.h>
28- #define msync (addr, len_bytes, flag ) winMSync
29- #define MS_ASYNC 0
3027#endif
3128
3229#define ROUNDUP (X, K ) (((X) + (K)-1 ) & -(K))
3330#define IS2POW (X ) (!((X) & ((X)-1 )))
3431
3532#define MAGIC_PATH " magic.dat"
3633#define MAGIC_ADDR (char *)0x330000000000
37- #define MAGIC_GRAN 2097152
34+ #define MAGIC_GRAN 65536
3835#define MAGIC_ALGN (sizeof (size_t ) * 2 )
3936
4037#define ANSI_COLOR_RED " \x1b [31m"
@@ -104,49 +101,21 @@ struct llama_model {
104101 std::map<std::string, struct ggml_tensor *> tensors;
105102};
106103
107-
108104struct magic {
109105 uint32_t magic;
110106 std::atomic<unsigned > lock;
111107 int fd;
112- size_t commit;
113- size_t offset;
114- size_t capacity;
115- gpt_vocab* vocab;
116- llama_model* model;
108+ uint64_t commit;
109+ uint64_t offset;
110+ uint64_t capacity;
111+ gpt_vocab * vocab;
112+ llama_model * model;
117113};
118114
119- static void winMSync (magic* addr, size_t len_bytes) {
120- bool success = FlushViewOfFile ((void *)addr, len_bytes);
121- if (!success) {
122- LPVOID lpMsgBuf;
123- LPVOID lpDisplayBuf;
124- DWORD error_code = GetLastError ();
125- FormatMessage (
126- FORMAT_MESSAGE_ALLOCATE_BUFFER |
127- FORMAT_MESSAGE_FROM_SYSTEM |
128- FORMAT_MESSAGE_IGNORE_INSERTS,
129- NULL ,
130- error_code,
131- MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT),
132- (LPTSTR)&lpMsgBuf,
133- 0 , NULL );
134- lpDisplayBuf = (LPVOID)LocalAlloc (LMEM_ZEROINIT,
135- (lstrlen ((LPCTSTR)lpMsgBuf) + 40 ) * sizeof (TCHAR));
136- StringCchPrintf ((LPTSTR)lpDisplayBuf,
137- LocalSize (lpDisplayBuf) / sizeof (TCHAR),
138- TEXT (" failed with error %d: %s" ),
139- error_code, lpMsgBuf);
140- }
141- HANDLE hFile = (HANDLE)_get_osfhandle (addr->fd );
142- FlushFileBuffers (hFile);
143- }
144-
145-
146115static struct magic *mag;
147116
148117static inline void spin_lock (std::atomic<unsigned > &lock) {
149- while (! lock.exchange (1 , std::memory_order_acquire));
118+ while (lock.exchange (1 , std::memory_order_acquire));
150119}
151120
152121static inline void spin_unlock (std::atomic<unsigned > &lock) {
@@ -162,62 +131,64 @@ static void *Mmap(void *addr, size_t length, int prot, int flags, int fd, off_t
162131}
163132
164133static void magic_commit (void ) {
165- mag->offset = mag->capacity ;
166- mag->commit = mag->capacity ;
134+ mag->commit = ROUNDUP (mag->offset , MAGIC_GRAN);
167135 mag->magic = 0xFEEDABEE ;
168- bool success = msync (mag, mag->commit , MS_ASYNC);
136+ if (msync (mag, mag->commit , MS_ASYNC) == -1 ) {
137+ perror (" msync" );
138+ exit (77 );
139+ }
169140}
170141
171142static void magic_init (void ) {
172143 int fd;
173144 size_t n;
174- #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
175- struct stat st;
176- #else
177- struct _stat64 st;
178- #endif
145+ int64_t size;
179146 if (mag) return ;
180147 n = ROUNDUP (sizeof (struct magic ), MAGIC_GRAN);
181148 if ((fd = open (MAGIC_PATH, O_RDWR)) != -1 ) {
182- int result = fstat (fd, &st);
183- int error = errno;
184- if (errno == EBADF)
185- fprintf (stderr, " Bad file descriptor.\n " );
186- else if (errno == EINVAL)
187- fprintf (stderr, " Invalid argument to _fstat.\n " );
188- if (st.st_size >= n) {
149+ if ((size = lseek (fd, 0 , SEEK_END)) == -1 ) {
150+ perror (" lseek" );
151+ exit (77 );
152+ }
153+ if (size >= n) {
189154 mag = (struct magic *)Mmap (MAGIC_ADDR, n,
190155 PROT_READ | PROT_WRITE,
191156 MAP_PRIVATE | MAP_FIXED, fd, 0 );
192157 if (mag->magic == 0xFEEDABEE ) {
193- mag = (struct magic *)Mmap (MAGIC_ADDR, mag->capacity ,
158+ mag = (struct magic *)Mmap (MAGIC_ADDR, mag->commit ,
194159 PROT_READ | PROT_WRITE,
195160 MAP_PRIVATE | MAP_FIXED, fd, 0 );
196161 madvise (MAGIC_ADDR, mag->capacity , MADV_WILLNEED);
197- ftruncate (fd, mag->commit );
198162 mag->offset = mag->commit ;
199163 mag->capacity = mag->commit ;
200164 mag->fd = -1 ;
201165 return ;
202166 }
203167 }
204- ftruncate (fd, 0 );
168+ if (ftruncate (fd, 0 ) == -1 ) {
169+ perror (" ftruncate" );
170+ exit (77 );
171+ }
205172 } else if ((fd = open (MAGIC_PATH, O_RDWR | O_CREAT | O_TRUNC, 0644 )) == -1 ) {
206173 perror (MAGIC_PATH);
207174 exit (77 );
208175 }
209- ftruncate (fd, n);
176+ if (ftruncate (fd, n) == -1 ) {
177+ perror (" ftruncate" );
178+ exit (77 );
179+ }
210180 mag = (struct magic *)Mmap (MAGIC_ADDR, n,
211181 PROT_READ | PROT_WRITE,
212182 MAP_SHARED | MAP_FIXED, fd, 0 );
213- mag->offset = MAGIC_GRAN;
183+ mag->offset = n;
184+ mag->capacity = n;
214185 mag->fd = fd;
215186}
216187
217- void *memalign (size_t a, size_t n) {
188+ void *magic_memalign (size_t a, size_t n) {
218189 void *p;
219- size_t i, j, k, m;
220190 static int count;
191+ size_t i, j, k, m, c2;
221192 magic_init ();
222193 if (a < MAGIC_ALGN) a = MAGIC_ALGN;
223194 while (!IS2POW (a)) ++a;
@@ -227,85 +198,82 @@ void *memalign(size_t a, size_t n) {
227198 i = i + sizeof (size_t );
228199 i = ROUNDUP (i, a);
229200 j = ROUNDUP (i + m, MAGIC_GRAN);
230- // if (j > mag->capacity) {
201+ if (j > mag->capacity ) {
202+ c2 = mag->capacity ;
203+ if (!c2) {
204+ c2 = MAGIC_GRAN;
205+ }
206+ while (j > c2) {
207+ c2 += c2 >> 4 ;
208+ c2 = ROUNDUP (c2, MAGIC_GRAN);
209+ }
231210 if (!mag->magic ) {
232- int result = ftruncate (mag->fd , j);
211+ if (ftruncate (mag->fd , c2) == -1 ) {
212+ perror (" ftruncate" );
213+ spin_unlock (mag->lock );
214+ return 0 ;
215+ }
233216 p = mmap (MAGIC_ADDR + mag->capacity ,
234- j - mag->capacity , PROT_READ | PROT_WRITE,
217+ c2 - mag->capacity , PROT_READ | PROT_WRITE,
235218 MAP_SHARED | MAP_FIXED, mag->fd , mag->capacity );
236219 } else {
237220 p = mmap (MAGIC_ADDR + mag->capacity ,
238- j - mag->capacity , PROT_READ | PROT_WRITE,
239- MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0 );
221+ c2 - mag->capacity , PROT_READ | PROT_WRITE,
222+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0 );
240223 }
241224 if (p != MAP_FAILED) {
242- mag->capacity = j ;
225+ mag->capacity = c2 ;
243226 } else {
227+ perror (" mmap" );
244228 spin_unlock (mag->lock );
245229 return 0 ;
246230 }
247- // }
231+ }
248232 mag->offset = i + m;
249233 spin_unlock (mag->lock );
250234 p = MAGIC_ADDR + i;
251235 ((size_t *)p)[-1 ] = n;
252236 return p;
253237}
254238
255- void *_malloc (size_t n) {
256- return memalign (MAGIC_ALGN, n);
257- }
258-
259- size_t malloc_usable_size (const void *p) {
260- return ((const size_t *)p)[-1 ];
239+ void *magic_malloc (size_t n) {
240+ return magic_memalign (MAGIC_ALGN, n);
261241}
262242
263- void *_calloc (size_t n, size_t z) {
243+ void *magic_calloc (size_t n, size_t z) {
264244 void *p;
265- if ((p = _malloc ((n *= z)))) {
245+ if ((p = magic_malloc ((n *= z)))) {
266246 memset (p, 0 , n);
267247 }
268248 return p;
269249}
270250
271- void _free (void *p) {
251+ void magic_free (void *p) {
272252 // do nothing
273253}
274254
275- void *_realloc (void *p, size_t n) {
255+ void *magic_realloc (void *p, size_t n) {
276256 void *q;
277257 if (!p) {
278- return _malloc (n);
258+ return magic_malloc (n);
279259 }
280260 if (!n) {
281- _free (p);
261+ magic_free (p);
282262 return 0 ;
283263 }
284- if ((q = _malloc (n))) {
264+ if ((q = magic_malloc (n))) {
285265 memcpy (q, p, ((const size_t *)p)[-1 ]);
286266 }
287267 return q;
288268}
289269
290- #if defined(malloc)
291- # undef malloc
292- #endif
293- #define malloc (x ) _malloc(x)
294-
295- #if defined(calloc)
296- # undef calloc
297- #endif
298- #define calloc (x ) _calloc(x)
299-
300- #if defined(realloc)
301- # undef realloc
302- #endif
303- #define realloc (x ) _realloc(x)
270+ void * operator new (size_t size) {
271+ return magic_malloc (size);
272+ }
304273
305- #if defined(free)
306- # undef free
307- #endif
308- #define free (x ) _free(x)
274+ void operator delete (void * p) {
275+ magic_free (p);
276+ }
309277
310278// load the model's weights from a file
311279bool llama_model_load (const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) {
@@ -451,7 +419,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
451419 {
452420 struct ggml_init_params params = {
453421 /* .mem_size =*/ ctx_size,
454- /* .mem_buffer =*/ NULL ,
422+ /* .mem_buffer =*/ magic_malloc (ctx_size) ,
455423 };
456424
457425 model.ctx = ggml_init (params);
@@ -772,15 +740,15 @@ bool llama_eval(
772740 const int d_key = n_embd/n_head;
773741
774742 static size_t buf_size = 512u *1024 *1024 ;
775- static void * buf = _malloc (buf_size);
743+ static void * buf = malloc (buf_size);
776744
777745 if (mem_per_token > 0 && mem_per_token*N > buf_size) {
778746 const size_t buf_size_new = 1.1 *(mem_per_token*N); // add 10% to account for ggml object overhead
779747 // fprintf(stderr, "\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
780748
781749 // reallocate
782750 buf_size = buf_size_new;
783- buf = _realloc (buf, buf_size);
751+ buf = realloc (buf, buf_size);
784752 if (buf == nullptr ) {
785753 fprintf (stderr, " %s: failed to allocate %zu bytes\n " , __func__, buf_size);
786754 return false ;
0 commit comments