@@ -1258,7 +1258,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1258
1258
//
1259
1259
1260
1260
struct ggml_object {
1261
- size_t offset ;
1261
+ size_t offs ;
1262
1262
size_t size ;
1263
1263
1264
1264
struct ggml_object * next ;
@@ -1284,6 +1284,9 @@ struct ggml_context {
1284
1284
1285
1285
struct ggml_object * objects_begin ;
1286
1286
struct ggml_object * objects_end ;
1287
+
1288
+ struct ggml_scratch scratch ;
1289
+ struct ggml_scratch scratch_save ;
1287
1290
};
1288
1291
1289
1292
struct ggml_context_container {
@@ -1346,7 +1349,7 @@ inline static void ggml_critical_section_end(void) {
1346
1349
1347
1350
void ggml_print_object (const struct ggml_object * obj ) {
1348
1351
GGML_PRINT (" - ggml_object: offset = %zu, size = %zu, next = %p\n" ,
1349
- obj -> offset , obj -> size , (const void * ) obj -> next );
1352
+ obj -> offs , obj -> size , (const void * ) obj -> next );
1350
1353
}
1351
1354
1352
1355
void ggml_print_objects (const struct ggml_context * ctx ) {
@@ -1542,12 +1545,14 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
1542
1545
}
1543
1546
1544
1547
* ctx = (struct ggml_context ) {
1545
- .mem_size = params .mem_size ,
1546
- .mem_buffer = params .mem_buffer ? params .mem_buffer : malloc (params .mem_size ),
1547
- .mem_buffer_owned = params .mem_buffer ? false : true,
1548
- .n_objects = 0 ,
1549
- .objects_begin = NULL ,
1550
- .objects_end = NULL ,
1548
+ /*.mem_size =*/ params .mem_size ,
1549
+ /*.mem_buffer =*/ params .mem_buffer ? params .mem_buffer : malloc (params .mem_size ),
1550
+ /*.mem_buffer_owned =*/ params .mem_buffer ? false : true,
1551
+ /*.n_objects =*/ 0 ,
1552
+ /*.objects_begin =*/ NULL ,
1553
+ /*.objects_end =*/ NULL ,
1554
+ /*.scratch =*/ { 0 , 0 , NULL , },
1555
+ /*.scratch_save =*/ { 0 , 0 , NULL , },
1551
1556
};
1552
1557
1553
1558
ggml_assert_aligned (ctx -> mem_buffer );
@@ -1570,7 +1575,7 @@ void ggml_free(struct ggml_context * ctx) {
1570
1575
g_state .contexts [i ].used = false;
1571
1576
1572
1577
GGML_PRINT_DEBUG ("%s: context %d with %d objects has been freed. memory used = %zu\n" ,
1573
- __func__ , i , ctx -> n_objects , ctx -> objects_end -> offset + ctx -> objects_end -> size );
1578
+ __func__ , i , ctx -> n_objects , ctx -> objects_end -> offs + ctx -> objects_end -> size );
1574
1579
1575
1580
if (ctx -> mem_buffer_owned ) {
1576
1581
free (ctx -> mem_buffer );
@@ -1589,7 +1594,15 @@ void ggml_free(struct ggml_context * ctx) {
1589
1594
}
1590
1595
1591
1596
size_t ggml_used_mem (const struct ggml_context * ctx ) {
1592
- return ctx -> objects_end -> offset + ctx -> objects_end -> size ;
1597
+ return ctx -> objects_end -> offs + ctx -> objects_end -> size ;
1598
+ }
1599
+
1600
+ size_t ggml_set_scratch (struct ggml_context * ctx , struct ggml_scratch scratch ) {
1601
+ const size_t result = ctx -> scratch .data ? ctx -> scratch .offs : 0 ;
1602
+
1603
+ ctx -> scratch = scratch ;
1604
+
1605
+ return result ;
1593
1606
}
1594
1607
1595
1608
////////////////////////////////////////////////////////////////////////////////
@@ -1603,9 +1616,9 @@ struct ggml_tensor * ggml_new_tensor_impl(
1603
1616
// always insert objects at the end of the context's memory pool
1604
1617
struct ggml_object * obj_cur = ctx -> objects_end ;
1605
1618
1606
- const size_t cur_offset = obj_cur == NULL ? 0 : obj_cur -> offset ;
1607
- const size_t cur_size = obj_cur == NULL ? 0 : obj_cur -> size ;
1608
- const size_t cur_end = cur_offset + cur_size ;
1619
+ const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur -> offs ;
1620
+ const size_t cur_size = obj_cur == NULL ? 0 : obj_cur -> size ;
1621
+ const size_t cur_end = cur_offs + cur_size ;
1609
1622
1610
1623
size_t size_needed = 0 ;
1611
1624
@@ -1616,25 +1629,52 @@ struct ggml_tensor * ggml_new_tensor_impl(
1616
1629
}
1617
1630
// align to GGML_MEM_ALIGN
1618
1631
size_needed = ((size_needed + GGML_MEM_ALIGN - 1 )/GGML_MEM_ALIGN )* GGML_MEM_ALIGN ;
1619
-
1620
- }
1621
- size_needed += sizeof (struct ggml_tensor );
1622
-
1623
- if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx -> mem_size ) {
1624
- GGML_PRINT ("%s: not enough space in the context's memory pool\n" , __func__ );
1625
- assert (false);
1626
- return NULL ;
1627
1632
}
1628
1633
1629
1634
char * const mem_buffer = ctx -> mem_buffer ;
1630
-
1631
1635
struct ggml_object * const obj_new = (struct ggml_object * )(mem_buffer + cur_end );
1632
1636
1633
- * obj_new = (struct ggml_object ) {
1634
- .offset = cur_end + GGML_OBJECT_SIZE ,
1635
- .size = size_needed ,
1636
- .next = NULL ,
1637
- };
1637
+ if (ctx -> scratch .data == NULL || data != NULL ) {
1638
+ size_needed += sizeof (struct ggml_tensor );
1639
+
1640
+ if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx -> mem_size ) {
1641
+ GGML_PRINT ("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n" ,
1642
+ __func__ , cur_end + size_needed + GGML_OBJECT_SIZE , ctx -> mem_size );
1643
+ assert (false);
1644
+ return NULL ;
1645
+ }
1646
+
1647
+ * obj_new = (struct ggml_object ) {
1648
+ .offs = cur_end + GGML_OBJECT_SIZE ,
1649
+ .size = size_needed ,
1650
+ .next = NULL ,
1651
+ };
1652
+ } else {
1653
+ if (ctx -> scratch .offs + size_needed > ctx -> scratch .size ) {
1654
+ GGML_PRINT ("%s: not enough space in the scratch memory\n" , __func__ );
1655
+ assert (false);
1656
+ return NULL ;
1657
+ }
1658
+
1659
+ if (cur_end + sizeof (struct ggml_tensor ) + GGML_OBJECT_SIZE > ctx -> mem_size ) {
1660
+ GGML_PRINT ("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n" ,
1661
+ __func__ , cur_end + sizeof (struct ggml_tensor ) + GGML_OBJECT_SIZE , ctx -> mem_size );
1662
+ assert (false);
1663
+ return NULL ;
1664
+ }
1665
+
1666
+ data = (char * const ) ctx -> scratch .data + ctx -> scratch .offs ;
1667
+
1668
+ * obj_new = (struct ggml_object ) {
1669
+ .offs = cur_end + GGML_OBJECT_SIZE ,
1670
+ .size = sizeof (struct ggml_tensor ),
1671
+ .next = NULL ,
1672
+ };
1673
+
1674
+ //printf("scratch offs = %zu, size_needed = %zu\n", ctx->scratch.offs, size_needed);
1675
+
1676
+ ctx -> scratch .offs += size_needed ;
1677
+ }
1638
1678
1639
1679
if (obj_cur != NULL ) {
1640
1680
obj_cur -> next = obj_new ;
@@ -1645,9 +1685,9 @@ struct ggml_tensor * ggml_new_tensor_impl(
1645
1685
1646
1686
ctx -> objects_end = obj_new ;
1647
1687
1648
- //GGML_PRINT_DEBUG ("%s: inserted new object at %zu\n", __func__, cur_end);
1688
+ //printf ("%s: inserted new object at %zu, size = %zu \n", __func__, cur_end, obj_new->size );
1649
1689
1650
- struct ggml_tensor * const result = (struct ggml_tensor * )(mem_buffer + obj_new -> offset );
1690
+ struct ggml_tensor * const result = (struct ggml_tensor * )(mem_buffer + obj_new -> offs );
1651
1691
1652
1692
ggml_assert_aligned (result );
1653
1693
@@ -1690,7 +1730,7 @@ struct ggml_tensor * ggml_new_tensor(
1690
1730
struct ggml_context * ctx ,
1691
1731
enum ggml_type type ,
1692
1732
int n_dims ,
1693
- const int * ne ) {
1733
+ const int * ne ) {
1694
1734
return ggml_new_tensor_impl (ctx , type , n_dims , ne , NULL );
1695
1735
}
1696
1736
@@ -1732,16 +1772,26 @@ struct ggml_tensor * ggml_new_tensor_4d(
1732
1772
}
1733
1773
1734
1774
struct ggml_tensor * ggml_new_i32 (struct ggml_context * ctx , int32_t value ) {
1775
+ ctx -> scratch_save = ctx -> scratch ;
1776
+ ctx -> scratch .data = NULL ;
1777
+
1735
1778
struct ggml_tensor * result = ggml_new_tensor_1d (ctx , GGML_TYPE_I32 , 1 );
1736
1779
1780
+ ctx -> scratch = ctx -> scratch_save ;
1781
+
1737
1782
ggml_set_i32 (result , value );
1738
1783
1739
1784
return result ;
1740
1785
}
1741
1786
1742
1787
struct ggml_tensor * ggml_new_f32 (struct ggml_context * ctx , float value ) {
1788
+ ctx -> scratch_save = ctx -> scratch ;
1789
+ ctx -> scratch .data = NULL ;
1790
+
1743
1791
struct ggml_tensor * result = ggml_new_tensor_1d (ctx , GGML_TYPE_F32 , 1 );
1744
1792
1793
+ ctx -> scratch = ctx -> scratch_save ;
1794
+
1745
1795
ggml_set_f32 (result , value );
1746
1796
1747
1797
return result ;
@@ -2350,7 +2400,7 @@ struct ggml_tensor * ggml_repeat(
2350
2400
result -> op = GGML_OP_REPEAT ;
2351
2401
result -> grad = is_node ? ggml_dup_tensor (ctx , result ) : NULL ;
2352
2402
result -> src0 = a ;
2353
- result -> src1 = NULL ;
2403
+ result -> src1 = b ;
2354
2404
2355
2405
return result ;
2356
2406
}
@@ -2966,9 +3016,7 @@ struct ggml_tensor * ggml_diag_mask_inf(
2966
3016
// TODO: when implement backward, fix this:
2967
3017
//struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2968
3018
struct ggml_tensor * result = ggml_view_tensor (ctx , a );
2969
-
2970
- struct ggml_tensor * b = ggml_new_tensor_1d (ctx , GGML_TYPE_I32 , 1 );
2971
- ((int32_t * ) b -> data )[0 ] = n_past ;
3019
+ struct ggml_tensor * b = ggml_new_i32 (ctx , n_past );
2972
3020
2973
3021
result -> op = GGML_OP_DIAG_MASK_INF ;
2974
3022
result -> grad = is_node ? ggml_dup_tensor (ctx , result ) : NULL ;
@@ -4300,7 +4348,9 @@ static bool ggml_compute_forward_mul_mat_use_blas(
4300
4348
const int ne1 = dst -> ne [1 ];
4301
4349
4302
4350
// TODO: find the optimal values for these
4303
- if (ggml_is_contiguous (src0 ) && ggml_is_contiguous (src1 ) && ne0 >= 32 && ne1 >= 32 && ne10 >= 32 ) {
4351
+ if (ggml_is_contiguous (src0 ) && ggml_is_contiguous (src1 ) && (
4352
+ (ne0 >= 32 && ne1 >= 32 && ne10 >= 32 )
4353
+ )) {
4304
4354
//printf("BLAS: %d %d %d\n", ne0, ne1, ne10);
4305
4355
return true;
4306
4356
}
@@ -7289,6 +7339,9 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
7289
7339
node -> n_tasks = 1 ; // TODO: this actually is doing nothing
7290
7340
// the threads are still spinning
7291
7341
cur = sizeof (float )* (node -> src0 -> ne [0 ]* node -> src0 -> ne [1 ]);
7342
+ //printf("src0: ne0 = %d, ne1 = %d, ne = %d\n", node->src0->ne[0], node->src0->ne[1], node->src0->ne[0]*node->src0->ne[1]);
7343
+ //printf("src1: ne0 = %d, ne1 = %d, ne = %d\n", node->src1->ne[0], node->src1->ne[1], node->src1->ne[0]*node->src1->ne[1]);
7344
+ //printf("cur = %zu\n", cur);
7292
7345
} else {
7293
7346
cur = sizeof (ggml_fp16_t )* ggml_nelements (node -> src1 );
7294
7347
}
0 commit comments