From a3d0aa73d15fe2e076870cc7ba19cec7bbbab8fd Mon Sep 17 00:00:00 2001 From: Finn Voorhees Date: Wed, 3 Jan 2024 08:39:43 -0500 Subject: [PATCH] ggml : add error handling to graph_compute (#1714) --- bindings/ruby/ext/ggml-backend-impl.h | 2 +- bindings/ruby/ext/ggml-backend.c | 4 ++-- bindings/ruby/ext/ggml-backend.h | 2 +- ggml-backend-impl.h | 2 +- ggml-backend.c | 10 +++++++--- ggml-backend.h | 2 +- ggml-cuda.cu | 4 +++- ggml-metal.h | 2 +- ggml-metal.m | 9 +++++---- whisper.cpp | 24 ++++++++++++++++-------- 10 files changed, 38 insertions(+), 23 deletions(-) diff --git a/bindings/ruby/ext/ggml-backend-impl.h b/bindings/ruby/ext/ggml-backend-impl.h index 211e3d4..31788cd 100644 --- a/bindings/ruby/ext/ggml-backend-impl.h +++ b/bindings/ruby/ext/ggml-backend-impl.h @@ -70,7 +70,7 @@ extern "C" { void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan); // compute graph without a plan - void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph); + bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph); // check if the backend supports an operation bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op); diff --git a/bindings/ruby/ext/ggml-backend.c b/bindings/ruby/ext/ggml-backend.c index f6e5fce..128e33c 100644 --- a/bindings/ruby/ext/ggml-backend.c +++ b/bindings/ruby/ext/ggml-backend.c @@ -156,8 +156,8 @@ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_ backend->iface.graph_plan_compute(backend, plan); } -void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { - backend->iface.graph_compute(backend, cgraph); +bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { + return backend->iface.graph_compute(backend, cgraph); } bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { diff --git a/bindings/ruby/ext/ggml-backend.h b/bindings/ruby/ext/ggml-backend.h index 9666873..793a0a9 100644 --- a/bindings/ruby/ext/ggml-backend.h +++ b/bindings/ruby/ext/ggml-backend.h @@ -52,7 +52,7 @@ extern "C" { GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan); GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan); - GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph); + GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph); GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op); // tensor copy between different backends diff --git a/ggml-backend-impl.h b/ggml-backend-impl.h index 0585993..ca21b47 100644 --- a/ggml-backend-impl.h +++ b/ggml-backend-impl.h @@ -90,7 +90,7 @@ extern "C" { void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan); // compute graph without a plan - void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph); + bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph); // check if the backend supports an operation bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op); diff --git a/ggml-backend.c b/ggml-backend.c index 2c37520..53e741c 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -195,11 +195,14 @@ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_ ggml_backend_synchronize(backend); } -void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { - backend->iface.graph_compute(backend, cgraph); +bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { + if (!backend->iface.graph_compute(backend, cgraph)) { + return false; + } // TODO: optional sync ggml_backend_synchronize(backend); + return true; } bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { @@ -597,7 +600,7 @@ static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_bac GGML_UNUSED(backend); } -static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { +static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context; struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads); @@ -611,6 +614,7 @@ static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_c cplan.work_data = cpu_ctx->work_data; ggml_graph_compute(cgraph, &cplan); + return true; } static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { diff --git a/ggml-backend.h b/ggml-backend.h index a9d2fdd..85ff67b 100644 --- a/ggml-backend.h +++ b/ggml-backend.h @@ -58,7 +58,7 @@ extern "C" { GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan); GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan); - GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph); + GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph); GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op); // tensor copy between different backends diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 52d3cc6..10c2161 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -9910,7 +9910,7 @@ static void ggml_backend_cuda_graph_plan_compute(ggml_backend_t backend, ggml_ba UNUSED(plan); } -static void ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { +static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { ggml_backend_context_cuda * cuda_ctx = (ggml_backend_context_cuda *)backend->context; ggml_cuda_set_main_device(cuda_ctx->device); @@ -9967,6 +9967,8 @@ static void ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph } UNUSED(backend); + + return true; } static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_tensor * op) { diff --git a/ggml-metal.h b/ggml-metal.h index b5e02b6..c4b7325 100644 --- a/ggml-metal.h +++ b/ggml-metal.h @@ -87,7 +87,7 @@ int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx); // same as ggml_graph_compute but uses Metal // creates gf->n_threads command buffers in parallel -void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf); +bool ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf); // // backend API diff --git a/ggml-metal.m b/ggml-metal.m index 7aa92c1..55cc1a8 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -977,7 +977,7 @@ static bool ggml_metal_supports_op(const struct ggml_tensor * op) { return false; } } -void ggml_metal_graph_compute( +bool ggml_metal_graph_compute( struct ggml_metal_context * ctx, struct ggml_cgraph * gf) { @autoreleasepool { @@ -2405,10 +2405,11 @@ void ggml_metal_graph_compute( MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status]; if (status != MTLCommandBufferStatusCompleted) { GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status); - GGML_ASSERT(false); + return false; } } + return true; } } @@ -2688,10 +2689,10 @@ static ggml_backend_buffer_type_t ggml_backend_metal_get_default_buffer_type(ggm UNUSED(backend); } -static void ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { +static bool ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context; - ggml_metal_graph_compute(metal_ctx, cgraph); + return ggml_metal_graph_compute(metal_ctx, cgraph); } static bool ggml_backend_metal_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { diff --git a/whisper.cpp b/whisper.cpp index 5c64410..4f216a9 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -152,7 +152,7 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text // ggml helpers // -static void ggml_graph_compute_helper( +static bool ggml_graph_compute_helper( struct ggml_cgraph * graph, std::vector & buf, int n_threads, @@ -168,10 +168,10 @@ static void ggml_graph_compute_helper( plan.work_data = buf.data(); } - ggml_graph_compute(graph, &plan); + return ggml_graph_compute(graph, &plan); } -static void ggml_graph_compute_helper( +static bool ggml_graph_compute_helper( struct ggml_backend * backend, struct ggml_cgraph * graph, int n_threads) { @@ -183,7 +183,7 @@ static void ggml_graph_compute_helper( ggml_backend_metal_set_n_cb(backend, n_threads); } #endif - ggml_backend_graph_compute(backend, graph); + return ggml_backend_graph_compute(backend, graph); } // faster matrix multiplications for tensors that do not have dimension 0 divisible by "pad" @@ -2103,7 +2103,9 @@ static bool whisper_encode_internal( ggml_allocr_alloc_graph(alloc, gf); if (!whisper_encode_external(wstate)) { - ggml_graph_compute_helper(wstate.backend, gf, n_threads); + if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) { + return false; + } } } @@ -2117,7 +2119,9 @@ static bool whisper_encode_internal( ggml_allocr_alloc_graph(alloc, gf); - ggml_graph_compute_helper(wstate.backend, gf, n_threads); + if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) { + return false; + } } // cross @@ -2130,7 +2134,9 @@ static bool whisper_encode_internal( ggml_allocr_alloc_graph(alloc, gf); - ggml_graph_compute_helper(wstate.backend, gf, n_threads); + if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) { + return false; + } } wstate.t_encode_us += ggml_time_us() - t_start_us; @@ -2552,7 +2558,9 @@ static bool whisper_decode_internal( logits = gf->nodes[gf->n_nodes - 1]; - ggml_graph_compute_helper(wstate.backend, gf, n_threads); + if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) { + return false; + } } logits_out.resize(n_tokens*n_vocab);