ggml-org
diff --git a/‎examples/eval-callback/eval-callback.cpp
+1-1 b/‎examples/eval-callback/eval-callback.cpp
+1-1
diff --git a/‎examples/imatrix/imatrix.cpp
+2-2 b/‎examples/imatrix/imatrix.cpp
+2-2
diff --git a/‎examples/llama-bench/llama-bench.cpp
+3-3 b/‎examples/llama-bench/llama-bench.cpp
+3-3
diff --git a/‎examples/llava/clip.cpp
+1-1 b/‎examples/llava/clip.cpp
+1-1
diff --git a/‎examples/tokenize/tokenize.cpp
+1-1 b/‎examples/tokenize/tokenize.cpp
+1-1
diff --git a/‎ggml/include/ggml.h
+3-2 b/‎ggml/include/ggml.h
+3-2
diff --git a/‎ggml/src/ggml-alloc.c
+4-5 b/‎ggml/src/ggml-alloc.c
+4-5
diff --git a/‎ggml/src/ggml-backend.c
+1-1 b/‎ggml/src/ggml-backend.c
+1-1
diff --git a/‎ggml/src/ggml-blas.cpp
+1-1 b/‎ggml/src/ggml-blas.cpp
+1-1
diff --git a/‎ggml/src/ggml-cann.cpp
+3-3 b/‎ggml/src/ggml-cann.cpp
+3-3
diff --git a/‎ggml/src/ggml-cann/aclnn_ops.cpp
+13-13 b/‎ggml/src/ggml-cann/aclnn_ops.cpp
+13-13
diff --git a/‎ggml/src/ggml-cuda.cu
+3-3 b/‎ggml/src/ggml-cuda.cu
+3-3
diff --git a/‎ggml/src/ggml-cuda/argsort.cu
+1-1 b/‎ggml/src/ggml-cuda/argsort.cu
+1-1
diff --git a/‎ggml/src/ggml-cuda/binbcast.cu
+1-1 b/‎ggml/src/ggml-cuda/binbcast.cu
+1-1
diff --git a/‎ggml/src/ggml-cuda/common.cuh
+1-1 b/‎ggml/src/ggml-cuda/common.cuh
+1-1
diff --git a/‎ggml/src/ggml-cuda/cpy.cu
+2-2 b/‎ggml/src/ggml-cuda/cpy.cu
+2-2
diff --git a/‎ggml/src/ggml-cuda/dmmv.cu
+1-1 b/‎ggml/src/ggml-cuda/dmmv.cu
+1-1
diff --git a/‎ggml/src/ggml-cuda/fattn-common.cuh
+3-3 b/‎ggml/src/ggml-cuda/fattn-common.cuh
+3-3
diff --git a/‎ggml/src/ggml-cuda/fattn-tile-f16.cu
+1-1 b/‎ggml/src/ggml-cuda/fattn-tile-f16.cu
+1-1
diff --git a/‎ggml/src/ggml-cuda/fattn-tile-f32.cu
+1-1 b/‎ggml/src/ggml-cuda/fattn-tile-f32.cu
+1-1
@@ -62,7 +62,7 @@ static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne
                     } else if (type == GGML_TYPE_I8) {
                         v = (float) *(int8_t *) &data[i];
                     } else {
-                        GGML_ASSERT(false);
+                        GGML_ABORT("fatal error");
                     }
                     printf("%12.4f", v);
                     sum += v;
 
@@ -127,7 +127,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
         }
         else if (e.values.size() != (size_t)src1->ne[0]*n_as) {
             fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as);
-            exit(1); //GGML_ASSERT(false);
+            exit(1); //GGML_ABORT("fatal error");
         }
         if (m_params.verbosity > 1) {
             printf("%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[2], (int)src1->type);
@@ -176,7 +176,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
         }
         else if (e.values.size() != (size_t)src1->ne[0]) {
             fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]);
-            exit(1); //GGML_ASSERT(false);
+            exit(1); //GGML_ABORT("fatal error");
         }
         ++e.ncall;
         if (m_params.verbosity > 1) {
 
@@ -150,7 +150,7 @@ static const char * output_format_str(output_formats format) {
         case JSON:     return "json";
         case MARKDOWN: return "md";
         case SQL:      return "sql";
-        default: GGML_ASSERT(!"invalid output format");
+        default: GGML_ABORT("invalid output format");
     }
 }
 
@@ -176,7 +176,7 @@ static const char * split_mode_str(llama_split_mode mode) {
         case LLAMA_SPLIT_MODE_NONE:  return "none";
         case LLAMA_SPLIT_MODE_LAYER: return "layer";
         case LLAMA_SPLIT_MODE_ROW:   return "row";
-        default: GGML_ASSERT(!"invalid split mode");
+        default: GGML_ABORT("invalid split mode");
     }
 }
 
@@ -1326,7 +1326,7 @@ static std::unique_ptr<printer> create_printer(output_formats format) {
         case SQL:
             return std::unique_ptr<printer>(new sql_printer());
     }
-    GGML_ASSERT(false);
+    GGML_ABORT("fatal error");
 }
 
 int main(int argc, char ** argv) {
 
@@ -869,7 +869,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
             embeddings = peg_0;
         }
         else {
-            GGML_ASSERT(false);
+            GGML_ABORT("fatal error");
         }
     }
 
 
@@ -163,7 +163,7 @@ static void write_utf8_cstr_to_stdout(const char * str, bool & invalid_utf8) {
                 printf(">");
                 return;
             }
-            GGML_ASSERT(false && "MultiByteToWideChar() failed in an unexpected way.");
+            GGML_ABORT("MultiByteToWideChar() failed in an unexpected way.");
         }
 
         LPWSTR wstr = (LPWSTR) calloc(length_needed+1, sizeof(*wstr));
 
@@ -272,7 +272,8 @@
 #define GGML_NORETURN _Noreturn
 #endif
 
-#define GGML_ASSERT(x) if (!(x)) ggml_abort(__FILE__, __LINE__, #x)
+#define GGML_ABORT(x) ggml_abort(__FILE__, __LINE__, x)
+#define GGML_ASSERT(x) if (!(x)) GGML_ABORT(#x)
 
 // used to copy the number of elements and stride in bytes of tensors into local variables.
 // main purpose is to reduce code duplication and improve readability.
@@ -322,7 +323,7 @@
 extern "C" {
 #endif
 
-    GGML_API GGML_NORETURN void ggml_abort(const char * file, int line, const char * expr);
+    GGML_NORETURN GGML_API void ggml_abort(const char * file, int line, const char * expr);
 
     enum ggml_status {
         GGML_STATUS_ALLOC_FAILED = -2,
 
@@ -91,7 +91,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso
     if (talloc->offset + size > ggml_backend_buffer_get_size(talloc->buffer)) {
         fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, available %zu)\n",
                 __func__, tensor->name, size, ggml_backend_buffer_get_size(talloc->buffer) - talloc->offset);
-        GGML_ASSERT(!"not enough space in the buffer");
+        GGML_ABORT("not enough space in the buffer");
     }
 
     void * addr = (char *)ggml_backend_buffer_get_base(talloc->buffer) + talloc->offset;
@@ -132,7 +132,7 @@ static void add_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset,
             return;
         }
     }
-    GGML_ASSERT(!"out of allocated_tensors");
+    GGML_ABORT("out of allocated_tensors");
 }
 static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset, const struct ggml_tensor * tensor) {
     for (int i = 0; i < 1024; i++) {
@@ -142,7 +142,7 @@ static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offs
         }
     }
     fprintf(stderr, "tried to free tensor %s not found\n", tensor->name);
-    GGML_ASSERT(!"tensor not found");
+    GGML_ABORT("tensor not found");
 }
 #endif
 
@@ -175,8 +175,7 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz
             // this should never happen
             fprintf(stderr, "%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n",
                     __func__, size, max_avail);
-            GGML_ASSERT(!"not enough space in the buffer");
-            GGML_UNREACHABLE();
+            GGML_ABORT("not enough space in the buffer");
         }
     }
 
 
@@ -1280,7 +1280,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
     sched->ctx = ggml_init(params);
     if (sched->ctx == NULL) {
         fprintf(stderr, "%s: failed to initialize context\n", __func__);
-        GGML_ASSERT(false);
+        GGML_ABORT("fatal error");
     }
 
     // pass 1: assign backends to ops with pre-allocated inputs
 
@@ -276,7 +276,7 @@ GGML_CALL static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t
 
             default:
                 fprintf(stderr, "%s: unsupported op %s\n", __func__, ggml_op_desc(node));
-                GGML_ASSERT(false);
+                GGML_ABORT("fatal error");
         }
     }
 
 
@@ -120,7 +120,7 @@ static void ggml_cann_log(enum ggml_log_level level, const char* format, ...) {
             file, line);
     GGML_CANN_LOG_ERROR("  %s\n", stmt);
     // abort with GGML_ASSERT to get a stack trace
-    GGML_ASSERT(!"CANN error");
+    GGML_ABORT("CANN error");
 }
 
 /**
@@ -342,7 +342,7 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
         // memory should always buffered. these memory may still needed by
         // tasks in stream.
         // TODO, fix me.
-        GGML_ASSERT(!"Cann buffer pool full, increase MAX_CANN_BUFFERS\n");
+        GGML_ABORT("Cann buffer pool full, increase MAX_CANN_BUFFERS\n");
     }
 };
 
@@ -1874,7 +1874,7 @@ static void ggml_backend_cann_event_wait(ggml_backend_t backend,
         ACL_CHECK(aclrtStreamWaitEvent(cann_ctx->stream(),
                                        (aclrtEvent)event->context));
     } else {
-        GGML_ASSERT(false);
+        GGML_ABORT("fatal error");
     }
 }
 
 
@@ -844,7 +844,7 @@ void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
             ggml_cann_max_pool2d(ctx, dst);
             break;
         case GGML_OP_POOL_COUNT:
-            GGML_ASSERT(false);
+            GGML_ABORT("fatal error");
             break;
     }
 }
@@ -931,9 +931,9 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
                         ((ggml_tensor*)dst->extra)->nb);
                     return;
                 }
-                GGML_ASSERT(false);
+                GGML_ABORT("fatal error");
             }
-            GGML_ASSERT(false);
+            GGML_ABORT("fatal error");
         }
         if (dst->type == GGML_TYPE_F32) {
             if (ggml_are_same_shape(src, dst)) {
@@ -955,12 +955,12 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
                         ((ggml_tensor*)dst->extra)->nb);
                     return;
                 }
-                GGML_ASSERT(false);
+                GGML_ABORT("fatal error");
             }
-            GGML_ASSERT(false);
+            GGML_ABORT("fatal error");
         }
         // TODO
-        GGML_ASSERT(false);
+        GGML_ABORT("fatal error");
     } else if (src->type == GGML_TYPE_F32) {
         // TODO: if (src0->type == dst->type && ne00 == ne0 && nb00 == type_size
         //          && nb0 == type_size)
@@ -991,10 +991,10 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
                         ((ggml_tensor*)dst->extra)->nb);
                     return;
                 }
-                GGML_ASSERT(false);
+                GGML_ABORT("fatal error");
             } else {
                 // TODO: dst not contiguous
-                GGML_ASSERT(false);
+                GGML_ABORT("fatal error");
             }
         }
         if (dst->type == GGML_TYPE_F16) {
@@ -1017,19 +1017,19 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
                         ((ggml_tensor*)dst->extra)->nb);
                     return;
                 }
-                GGML_ASSERT(false);
+                GGML_ABORT("fatal error");
             }
         }
         // TODO
-        GGML_ASSERT(false);
+        GGML_ABORT("fatal error");
     } else {
         if (ggml_are_same_shape(src, dst)) {
             cann_copy(ctx, acl_src, acl_dst);
             ACL_CHECK(aclDestroyTensor(acl_src));
             ACL_CHECK(aclDestroyTensor(acl_dst));
             return;
         }
-        GGML_ASSERT(false);
+        GGML_ABORT("fatal error");
     }
 }
 
@@ -2219,7 +2219,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
                 ((ggml_tensor*)dst->extra)->nb);
             break;
         default:
-            GGML_ASSERT(false);
+            GGML_ABORT("fatal error");
             break;
     }
 }
@@ -2492,7 +2492,7 @@ void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
             ggml_cann_mul_mat_q8_0(ctx, dst);
             break;
         default:
-            GGML_ASSERT(false);
+            GGML_ABORT("fatal error");
             break;
     }
 }
 
@@ -98,7 +98,7 @@ void ggml_cuda_error(const char * stmt, const char * func, const char * file, in
     GGML_CUDA_LOG_ERROR("  current device: %d, in function %s at %s:%d\n", id, func, file, line);
     GGML_CUDA_LOG_ERROR("  %s\n", stmt);
     // abort with GGML_ASSERT to get a stack trace
-    GGML_ASSERT(!"CUDA error");
+    GGML_ABORT("CUDA error");
 }
 
 // this is faster on Windows
@@ -1596,7 +1596,7 @@ static void ggml_cuda_op_mul_mat(
                     CUDA_CHECK(ggml_cuda_cpy_tensor_2d(
                                 src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream));
                 } else {
-                    GGML_ASSERT(false);
+                    GGML_ABORT("fatal error");
                 }
 
                 if (quantize_src1 && !src1_is_contiguous) {
@@ -2945,7 +2945,7 @@ static void ggml_backend_cuda_event_wait(ggml_backend_t backend, ggml_backend_ev
 
         CUDA_CHECK(cudaLaunchHostFunc(cuda_ctx->stream(), wait_fn, event));
 #endif
-        GGML_ASSERT(false);
+        GGML_ABORT("fatal error");
     }
 }
 
 
@@ -81,7 +81,7 @@ static void argsort_f32_i32_cuda(const float * x, int * dst, const int ncols, co
     } else if (order == GGML_SORT_ORDER_DESC) {
         k_argsort_f32_i32<GGML_SORT_ORDER_DESC><<<block_nums, block_dims, shared_mem, stream>>>(x, dst, ncols, ncols_pad);
     } else {
-        GGML_ASSERT(false);
+        GGML_ABORT("fatal error");
     }
 }
 
 
@@ -259,7 +259,7 @@ static void ggml_cuda_op_bin_bcast(
     } else {
         fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__,
             ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type));
-        GGML_ASSERT(false);
+        GGML_ABORT("fatal error");
     }
 }
 
 
@@ -348,7 +348,7 @@ static __device__ void no_device_code(
 #ifdef __CUDA_ARCH__
 #define NO_DEVICE_CODE no_device_code(__FILE__, __LINE__, __FUNCTION__, __CUDA_ARCH__, STRINGIZE(__CUDA_ARCH_LIST__))
 #else
-#define NO_DEVICE_CODE //GGML_ASSERT(false && "NO_DEVICE_CODE not valid in host code.")
+#define NO_DEVICE_CODE //GGML_ABORT("NO_DEVICE_CODE not valid in host code.")
 #endif // __CUDA_ARCH__
 
 static __device__ __forceinline__ float warp_reduce_sum(float x) {
 
@@ -451,7 +451,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
     } else {
         fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
                 ggml_type_name(src0->type), ggml_type_name(src1->type));
-        GGML_ASSERT(false);
+        GGML_ABORT("fatal error");
     }
 }
 
@@ -484,6 +484,6 @@ void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1) {
     } else {
         fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
                 ggml_type_name(src0->type), ggml_type_name(src1->type));
-        GGML_ASSERT(false);
+        GGML_ABORT("fatal error");
     }
 }
@@ -662,7 +662,7 @@ void ggml_cuda_op_dequantize_mul_mat_vec(
             convert_mul_mat_vec_f16_cuda(src0_dd_i, src1_dfloat, dst_dd_i, ne00, row_diff, stream);
             break;
         default:
-            GGML_ASSERT(false);
+            GGML_ABORT("fatal error");
             break;
     }
 
 
@@ -564,19 +564,19 @@ static void on_no_fattn_vec_case(const int D) {
         fprintf(stderr, "Unsupported KV type combination for head_size 64.\n");
         fprintf(stderr, "By default only f16 KV cache is supported.\n");
         fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for V cache quantization support.\n");
-        GGML_ASSERT(false);
+        GGML_ABORT("fatal error");
     } else if (D == 128) {
         fprintf(stderr, "Unsupported KV type combination for head_size 128.\n");
         fprintf(stderr, "Supported combinations:\n");
         fprintf(stderr, "  - K == q4_0, V == q4_0,  4.50 BPV\n");
         fprintf(stderr, "  - K == q8_0, V == q8_0,  8.50 BPV\n");
         fprintf(stderr, "  - K == f16,  V == f16,  16.00 BPV\n");
         fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for all combinations of q4_0, q4_1, q5_0, q5_1, q8_0, and f16.\n");
-        GGML_ASSERT(false);
+        GGML_ABORT("fatal error");
     } else {
         fprintf(stderr, "Unsupported KV type combination for head_size 256.\n");
         fprintf(stderr, "Only f16 is supported.\n");
-        GGML_ASSERT(false);
+        GGML_ABORT("fatal error");
     }
 }
 
 
@@ -287,7 +287,7 @@ void launch_fattn_tile_f16_64_128(ggml_backend_cuda_context & ctx, ggml_tensor *
             launch_fattn<D, parallel_blocks>(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true);
         } break;
         default: {
-            GGML_ASSERT(false && "FlashAttention without tensor cores only supports head sizes 64 and 128.");
+            GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128.");
         } break;
     }
 }
 
@@ -284,7 +284,7 @@ void launch_fattn_tile_f32_64_128(ggml_backend_cuda_context & ctx, ggml_tensor *
             launch_fattn<D, parallel_blocks>(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true);
         } break;
         default: {
-            GGML_ASSERT(false && "FlashAttention without tensor cores only supports head sizes 64 and 128.");
+            GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128.");
         } break;
     }
 }
Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@ static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne`
`62`	`62`	`} else if (type == GGML_TYPE_I8) {`
`63`	`63`	`v = (float) (int8_t ) &data[i];`
`64`	`64`	`} else {`
`65`		`- GGML_ASSERT(false);`
	`65`	`+ GGML_ABORT("fatal error");`
`66`	`66`	`}`
`67`	`67`	`printf("%12.4f", v);`
`68`	`68`	`sum += v;`
Original file line number	Diff line number	Diff line change
`@@ -127,7 +127,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *`
`127`	`127`	`}`
`128`	`128`	`else if (e.values.size() != (size_t)src1->ne[0]*n_as) {`
`129`	`129`	`fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as);`
`130`		`- exit(1); //GGML_ASSERT(false);`
	`130`	`+ exit(1); //GGML_ABORT("fatal error");`
`131`	`131`	`}`
`132`	`132`	`if (m_params.verbosity > 1) {`
`133`	`133`	`printf("%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[2], (int)src1->type);`
`@@ -176,7 +176,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *`
`176`	`176`	`}`
`177`	`177`	`else if (e.values.size() != (size_t)src1->ne[0]) {`
`178`	`178`	`fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]);`
`179`		`- exit(1); //GGML_ASSERT(false);`
	`179`	`+ exit(1); //GGML_ABORT("fatal error");`
`180`	`180`	`}`
`181`	`181`	`++e.ncall;`
`182`	`182`	`if (m_params.verbosity > 1) {`
Original file line number	Diff line number	Diff line change
`@@ -150,7 +150,7 @@ static const char * output_format_str(output_formats format) {`
`150`	`150`	`case JSON: return "json";`
`151`	`151`	`case MARKDOWN: return "md";`
`152`	`152`	`case SQL: return "sql";`
`153`		`- default: GGML_ASSERT(!"invalid output format");`
	`153`	`+ default: GGML_ABORT("invalid output format");`
`154`	`154`	`}`
`155`	`155`	`}`
`156`	`156`
`@@ -176,7 +176,7 @@ static const char * split_mode_str(llama_split_mode mode) {`
`176`	`176`	`case LLAMA_SPLIT_MODE_NONE: return "none";`
`177`	`177`	`case LLAMA_SPLIT_MODE_LAYER: return "layer";`
`178`	`178`	`case LLAMA_SPLIT_MODE_ROW: return "row";`
`179`		`- default: GGML_ASSERT(!"invalid split mode");`
	`179`	`+ default: GGML_ABORT("invalid split mode");`
`180`	`180`	`}`
`181`	`181`	`}`
`182`	`182`
`@@ -1326,7 +1326,7 @@ static std::unique_ptr<printer> create_printer(output_formats format) {`
`1326`	`1326`	`case SQL:`
`1327`	`1327`	`return std::unique_ptr<printer>(new sql_printer());`
`1328`	`1328`	`}`
`1329`		`- GGML_ASSERT(false);`
	`1329`	`+ GGML_ABORT("fatal error");`
`1330`	`1330`	`}`
`1331`	`1331`
`1332`	`1332`	`int main(int argc, char ** argv) {`
Original file line number	Diff line number	Diff line change
`@@ -869,7 +869,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32`
`869`	`869`	`embeddings = peg_0;`
`870`	`870`	`}`
`871`	`871`	`else {`
`872`		`- GGML_ASSERT(false);`
	`872`	`+ GGML_ABORT("fatal error");`
`873`	`873`	`}`
`874`	`874`	`}`
`875`	`875`
Original file line number	Diff line number	Diff line change
`@@ -163,7 +163,7 @@ static void write_utf8_cstr_to_stdout(const char * str, bool & invalid_utf8) {`
`163`	`163`	`printf(">");`
`164`	`164`	`return;`
`165`	`165`	`}`
`166`		`- GGML_ASSERT(false && "MultiByteToWideChar() failed in an unexpected way.");`
	`166`	`+ GGML_ABORT("MultiByteToWideChar() failed in an unexpected way.");`
`167`	`167`	`}`
`168`	`168`
`169`	`169`	`LPWSTR wstr = (LPWSTR) calloc(length_needed+1, sizeof(*wstr));`
Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso`
`91`	`91`	`if (talloc->offset + size > ggml_backend_buffer_get_size(talloc->buffer)) {`
`92`	`92`	`fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, available %zu)\n",`
`93`	`93`	`__func__, tensor->name, size, ggml_backend_buffer_get_size(talloc->buffer) - talloc->offset);`
`94`		`- GGML_ASSERT(!"not enough space in the buffer");`
	`94`	`+ GGML_ABORT("not enough space in the buffer");`
`95`	`95`	`}`
`96`	`96`
`97`	`97`	`void * addr = (char *)ggml_backend_buffer_get_base(talloc->buffer) + talloc->offset;`
`@@ -132,7 +132,7 @@ static void add_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset,`
`132`	`132`	`return;`
`133`	`133`	`}`
`134`	`134`	`}`
`135`		`- GGML_ASSERT(!"out of allocated_tensors");`
	`135`	`+ GGML_ABORT("out of allocated_tensors");`
`136`	`136`	`}`
`137`	`137`	`static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset, const struct ggml_tensor * tensor) {`
`138`	`138`	`for (int i = 0; i < 1024; i++) {`
`@@ -142,7 +142,7 @@ static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offs`
`142`	`142`	`}`
`143`	`143`	`}`
`144`	`144`	`fprintf(stderr, "tried to free tensor %s not found\n", tensor->name);`
`145`		`- GGML_ASSERT(!"tensor not found");`
	`145`	`+ GGML_ABORT("tensor not found");`
`146`	`146`	`}`
`147`	`147`	`#endif`
`148`	`148`
`@@ -175,8 +175,7 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz`
`175`	`175`	`// this should never happen`
`176`	`176`	`fprintf(stderr, "%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n",`
`177`	`177`	`__func__, size, max_avail);`
`178`		`- GGML_ASSERT(!"not enough space in the buffer");`
`179`		`- GGML_UNREACHABLE();`
	`178`	`+ GGML_ABORT("not enough space in the buffer");`
`180`	`179`	`}`
`181`	`180`	`}`
`182`	`181`
Original file line number	Diff line number	Diff line change
`@@ -1280,7 +1280,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg`
`1280`	`1280`	`sched->ctx = ggml_init(params);`
`1281`	`1281`	`if (sched->ctx == NULL) {`
`1282`	`1282`	`fprintf(stderr, "%s: failed to initialize context\n", __func__);`
`1283`		`- GGML_ASSERT(false);`
	`1283`	`+ GGML_ABORT("fatal error");`
`1284`	`1284`	`}`
`1285`	`1285`
`1286`	`1286`	`// pass 1: assign backends to ops with pre-allocated inputs`
Original file line number	Diff line number	Diff line change
`@@ -276,7 +276,7 @@ GGML_CALL static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t`
`276`	`276`
`277`	`277`	`default:`
`278`	`278`	`fprintf(stderr, "%s: unsupported op %s\n", __func__, ggml_op_desc(node));`
`279`		`- GGML_ASSERT(false);`
	`279`	`+ GGML_ABORT("fatal error");`
`280`	`280`	`}`
`281`	`281`	`}`
`282`	`282`
Original file line number	Diff line number	Diff line change
`@@ -120,7 +120,7 @@ static void ggml_cann_log(enum ggml_log_level level, const char* format, ...) {`
`120`	`120`	`file, line);`
`121`	`121`	`GGML_CANN_LOG_ERROR(" %s\n", stmt);`
`122`	`122`	`// abort with GGML_ASSERT to get a stack trace`
`123`		`- GGML_ASSERT(!"CANN error");`
	`123`	`+ GGML_ABORT("CANN error");`
`124`	`124`	`}`
`125`	`125`
`126`	`126`	`/**`
`@@ -342,7 +342,7 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {`
`342`	`342`	`// memory should always buffered. these memory may still needed by`
`343`	`343`	`// tasks in stream.`
`344`	`344`	`// TODO, fix me.`
`345`		`- GGML_ASSERT(!"Cann buffer pool full, increase MAX_CANN_BUFFERS\n");`
	`345`	`+ GGML_ABORT("Cann buffer pool full, increase MAX_CANN_BUFFERS\n");`
`346`	`346`	`}`
`347`	`347`	`};`
`348`	`348`
`@@ -1874,7 +1874,7 @@ static void ggml_backend_cann_event_wait(ggml_backend_t backend,`
`1874`	`1874`	`ACL_CHECK(aclrtStreamWaitEvent(cann_ctx->stream(),`
`1875`	`1875`	`(aclrtEvent)event->context));`
`1876`	`1876`	`} else {`
`1877`		`- GGML_ASSERT(false);`
	`1877`	`+ GGML_ABORT("fatal error");`
`1878`	`1878`	`}`
`1879`	`1879`	`}`
`1880`	`1880`
Original file line number	Diff line number	Diff line change
`@@ -844,7 +844,7 @@ void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst) {`
`844`	`844`	`ggml_cann_max_pool2d(ctx, dst);`
`845`	`845`	`break;`
`846`	`846`	`case GGML_OP_POOL_COUNT:`
`847`		`- GGML_ASSERT(false);`
	`847`	`+ GGML_ABORT("fatal error");`
`848`	`848`	`break;`
`849`	`849`	`}`
`850`	`850`	`}`
`@@ -931,9 +931,9 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {`
`931`	`931`	`((ggml_tensor*)dst->extra)->nb);`
`932`	`932`	`return;`
`933`	`933`	`}`
`934`		`- GGML_ASSERT(false);`
	`934`	`+ GGML_ABORT("fatal error");`
`935`	`935`	`}`
`936`		`- GGML_ASSERT(false);`
	`936`	`+ GGML_ABORT("fatal error");`
`937`	`937`	`}`
`938`	`938`	`if (dst->type == GGML_TYPE_F32) {`
`939`	`939`	`if (ggml_are_same_shape(src, dst)) {`
`@@ -955,12 +955,12 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {`
`955`	`955`	`((ggml_tensor*)dst->extra)->nb);`
`956`	`956`	`return;`
`957`	`957`	`}`
`958`		`- GGML_ASSERT(false);`
	`958`	`+ GGML_ABORT("fatal error");`
`959`	`959`	`}`
`960`		`- GGML_ASSERT(false);`
	`960`	`+ GGML_ABORT("fatal error");`
`961`	`961`	`}`
`962`	`962`	`// TODO`
`963`		`- GGML_ASSERT(false);`
	`963`	`+ GGML_ABORT("fatal error");`
`964`	`964`	`} else if (src->type == GGML_TYPE_F32) {`
`965`	`965`	`// TODO: if (src0->type == dst->type && ne00 == ne0 && nb00 == type_size`
`966`	`966`	`// && nb0 == type_size)`
`@@ -991,10 +991,10 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {`
`991`	`991`	`((ggml_tensor*)dst->extra)->nb);`
`992`	`992`	`return;`
`993`	`993`	`}`
`994`		`- GGML_ASSERT(false);`
	`994`	`+ GGML_ABORT("fatal error");`
`995`	`995`	`} else {`
`996`	`996`	`// TODO: dst not contiguous`
`997`		`- GGML_ASSERT(false);`
	`997`	`+ GGML_ABORT("fatal error");`
`998`	`998`	`}`
`999`	`999`	`}`
`1000`	`1000`	`if (dst->type == GGML_TYPE_F16) {`
`@@ -1017,19 +1017,19 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {`
`1017`	`1017`	`((ggml_tensor*)dst->extra)->nb);`
`1018`	`1018`	`return;`
`1019`	`1019`	`}`
`1020`		`- GGML_ASSERT(false);`
	`1020`	`+ GGML_ABORT("fatal error");`
`1021`	`1021`	`}`
`1022`	`1022`	`}`
`1023`	`1023`	`// TODO`
`1024`		`- GGML_ASSERT(false);`
	`1024`	`+ GGML_ABORT("fatal error");`
`1025`	`1025`	`} else {`
`1026`	`1026`	`if (ggml_are_same_shape(src, dst)) {`
`1027`	`1027`	`cann_copy(ctx, acl_src, acl_dst);`
`1028`	`1028`	`ACL_CHECK(aclDestroyTensor(acl_src));`
`1029`	`1029`	`ACL_CHECK(aclDestroyTensor(acl_dst));`
`1030`	`1030`	`return;`
`1031`	`1031`	`}`
`1032`		`- GGML_ASSERT(false);`
	`1032`	`+ GGML_ABORT("fatal error");`
`1033`	`1033`	`}`
`1034`	`1034`	`}`
`1035`	`1035`
`@@ -2219,7 +2219,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {`
`2219`	`2219`	`((ggml_tensor*)dst->extra)->nb);`
`2220`	`2220`	`break;`
`2221`	`2221`	`default:`
`2222`		`- GGML_ASSERT(false);`
	`2222`	`+ GGML_ABORT("fatal error");`
`2223`	`2223`	`break;`
`2224`	`2224`	`}`
`2225`	`2225`	`}`
`@@ -2492,7 +2492,7 @@ void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {`
`2492`	`2492`	`ggml_cann_mul_mat_q8_0(ctx, dst);`
`2493`	`2493`	`break;`
`2494`	`2494`	`default:`
`2495`		`- GGML_ASSERT(false);`
	`2495`	`+ GGML_ABORT("fatal error");`
`2496`	`2496`	`break;`
`2497`	`2497`	`}`
`2498`	`2498`	`}`