whisper.cpp/coreml/whisper-encoder.mm

#if !__has_feature(objc_arc)
#error This file must be compiled with automatic reference counting enabled (-fobjc-arc)
#endif

#import "whisper-encoder.h"
#import "whisper-encoder-impl.h"

#import <CoreML/CoreML.h>

#include <stdlib.h>

#if __cplusplus
extern "C" {
#endif

struct whisper_coreml_context {
    const void * data;
};

struct whisper_coreml_context * whisper_coreml_init(const char * path_model) {
    NSString * path_model_str = [[NSString alloc] initWithUTF8String:path_model];

    NSURL * url_model = [NSURL fileURLWithPath: path_model_str];

    // select which device to run the Core ML model on
    MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
    // config.computeUnits = MLComputeUnitsCPUAndGPU;
    //config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
    config.computeUnits = MLComputeUnitsAll;

    const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);

    if (data == NULL) {
        return NULL;
    }

    whisper_coreml_context * ctx = new whisper_coreml_context;

    ctx->data = data;

    return ctx;
}

void whisper_coreml_free(struct whisper_coreml_context * ctx) {
    CFRelease(ctx->data);
    delete ctx;
}

void whisper_coreml_encode(
        const whisper_coreml_context * ctx,
                             int64_t   n_ctx,
                             int64_t   n_mel,
                               float * mel,
                               float * out) {
    MLMultiArray * inMultiArray = [
        [MLMultiArray alloc] initWithDataPointer: mel
                                           shape: @[@1, @(n_mel), @(n_ctx)]
                                        dataType: MLMultiArrayDataTypeFloat32
                                         strides: @[@(n_ctx*n_mel), @(n_ctx), @1]
                                     deallocator: nil
                                           error: nil
    ];

    @autoreleasepool {
        whisper_encoder_implOutput * outCoreML = [(__bridge id) ctx->data predictionFromLogmel_data:inMultiArray error:nil];

        memcpy(out, outCoreML.output.dataPointer, outCoreML.output.count * sizeof(float));
    }
}

#if __cplusplus
}
#endif
whisper.objc : enable Core ML in example & fix segmentation fault (#910) * coreml : update endcoder header import path * coreml : force objc_arc in whisper-encoder.mm * whisper.objc : create coreml/ group link * whisper.objc : add coreml model link * whisper.objc : update readme * coreml : use -fobjc-arc for coreml/whisper-encoder.mm * ci: create dummy .mlmodelc for pass ios build * whisper.objc : update readme --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2023-05-14 08:47:02 +02:00			`#if !__has_feature(objc_arc)`
			`#error This file must be compiled with automatic reference counting enabled (-fobjc-arc)`
			`#endif`

			`#import "whisper-encoder.h"`
			`#import "whisper-encoder-impl.h"`
whisper : add Core ML support (#566) * coreml : use Core ML encoder inference * coreml : simlpify whisper_encode + log messages * whisper : resolve rebase conflicts * coreml : add scripts for CoreML model generation * bench-all : recognize COREML flag 2023-04-15 12:21:27 +02:00
			`#import <CoreML/CoreML.h>`

			`#include <stdlib.h>`

			`#if __cplusplus`
			`extern "C" {`
			`#endif`

			`struct whisper_coreml_context {`
			`const void * data;`
			`};`

			`struct whisper_coreml_context * whisper_coreml_init(const char * path_model) {`
			`NSString * path_model_str = [[NSString alloc] initWithUTF8String:path_model];`

			`NSURL * url_model = [NSURL fileURLWithPath: path_model_str];`

whisper : Metal and ggml-alloc support (#1270) * metal : init * whisper : factor out graph builds * whisper : allocate encoder and decoder using ggml-alloc * whisper : ggml-alloc is now supported * whisper : CoreML support ggml-alloc * build : fix ggml-alloc * ios : update submodule * extra : update sync-ggml.sh script to also sync ggml-alloc * ci : see if this is causing the crash * whisper : refactor ggml-alloc init * whisper.android : try to fix build * whisper : initial Metal version * ci : try to debug vmem issue * metal : decoder works on GPU! * metal : add multi-decoder support * ggml : fix ggml_nbytes (probably temp solution) * metal : run "cross" step on the GPU * whisper : remove ggml_repeat in the encoder * whisper : offload the Encoder to Metal * ggml : use simpler ggml_bytes() implementation * ggml-alloc : try to make CI happy by reducing vram to 128GB * whisper : add whisper_allocr to wrap ggml_allocr * whisper : factor out alloc init in a function * cmake : update to support Metal build * whisper : add <functional> header * objc : fix build (no Metal yet) * ios : add Metal support * swiftui : fix build * metal : speed-up KQ multiplication * metal : sync latest llama.cpp kernels * readme : add Metal info * ios : update submodule * coreml : add code to toggle Core ML config (CPU, ANE, GPU) * bench : fix timings by running a pre-heat * bench : start benching the decoder * whisper : add ggml_mul_mat_pad * bench : fix uninitialized vars * whisper : add comment for disabling mul-mat padding * whisper : add description of ggml_mul_mat_pad * whisper : clean-up ggml_mul_mat_pad * metal : remove the "concurrent" flag * bench : variable n_past * ios : update SPM package 2023-09-15 11:18:18 +02:00			`// select which device to run the Core ML model on`
			`MLModelConfiguration *config = [[MLModelConfiguration alloc] init];`
coreml : fix ANE optimized encoder (#1716) 2024-01-04 15:28:30 +01:00			`// config.computeUnits = MLComputeUnitsCPUAndGPU;`
whisper : Metal and ggml-alloc support (#1270) * metal : init * whisper : factor out graph builds * whisper : allocate encoder and decoder using ggml-alloc * whisper : ggml-alloc is now supported * whisper : CoreML support ggml-alloc * build : fix ggml-alloc * ios : update submodule * extra : update sync-ggml.sh script to also sync ggml-alloc * ci : see if this is causing the crash * whisper : refactor ggml-alloc init * whisper.android : try to fix build * whisper : initial Metal version * ci : try to debug vmem issue * metal : decoder works on GPU! * metal : add multi-decoder support * ggml : fix ggml_nbytes (probably temp solution) * metal : run "cross" step on the GPU * whisper : remove ggml_repeat in the encoder * whisper : offload the Encoder to Metal * ggml : use simpler ggml_bytes() implementation * ggml-alloc : try to make CI happy by reducing vram to 128GB * whisper : add whisper_allocr to wrap ggml_allocr * whisper : factor out alloc init in a function * cmake : update to support Metal build * whisper : add <functional> header * objc : fix build (no Metal yet) * ios : add Metal support * swiftui : fix build * metal : speed-up KQ multiplication * metal : sync latest llama.cpp kernels * readme : add Metal info * ios : update submodule * coreml : add code to toggle Core ML config (CPU, ANE, GPU) * bench : fix timings by running a pre-heat * bench : start benching the decoder * whisper : add ggml_mul_mat_pad * bench : fix uninitialized vars * whisper : add comment for disabling mul-mat padding * whisper : add description of ggml_mul_mat_pad * whisper : clean-up ggml_mul_mat_pad * metal : remove the "concurrent" flag * bench : variable n_past * ios : update SPM package 2023-09-15 11:18:18 +02:00			`//config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;`
coreml : fix ANE optimized encoder (#1716) 2024-01-04 15:28:30 +01:00			`config.computeUnits = MLComputeUnitsAll;`
whisper : Metal and ggml-alloc support (#1270) * metal : init * whisper : factor out graph builds * whisper : allocate encoder and decoder using ggml-alloc * whisper : ggml-alloc is now supported * whisper : CoreML support ggml-alloc * build : fix ggml-alloc * ios : update submodule * extra : update sync-ggml.sh script to also sync ggml-alloc * ci : see if this is causing the crash * whisper : refactor ggml-alloc init * whisper.android : try to fix build * whisper : initial Metal version * ci : try to debug vmem issue * metal : decoder works on GPU! * metal : add multi-decoder support * ggml : fix ggml_nbytes (probably temp solution) * metal : run "cross" step on the GPU * whisper : remove ggml_repeat in the encoder * whisper : offload the Encoder to Metal * ggml : use simpler ggml_bytes() implementation * ggml-alloc : try to make CI happy by reducing vram to 128GB * whisper : add whisper_allocr to wrap ggml_allocr * whisper : factor out alloc init in a function * cmake : update to support Metal build * whisper : add <functional> header * objc : fix build (no Metal yet) * ios : add Metal support * swiftui : fix build * metal : speed-up KQ multiplication * metal : sync latest llama.cpp kernels * readme : add Metal info * ios : update submodule * coreml : add code to toggle Core ML config (CPU, ANE, GPU) * bench : fix timings by running a pre-heat * bench : start benching the decoder * whisper : add ggml_mul_mat_pad * bench : fix uninitialized vars * whisper : add comment for disabling mul-mat padding * whisper : add description of ggml_mul_mat_pad * whisper : clean-up ggml_mul_mat_pad * metal : remove the "concurrent" flag * bench : variable n_past * ios : update SPM package 2023-09-15 11:18:18 +02:00
			`const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);`
whisper : add Core ML support (#566) * coreml : use Core ML encoder inference * coreml : simlpify whisper_encode + log messages * whisper : resolve rebase conflicts * coreml : add scripts for CoreML model generation * bench-all : recognize COREML flag 2023-04-15 12:21:27 +02:00
			`if (data == NULL) {`
			`return NULL;`
			`}`

			`whisper_coreml_context * ctx = new whisper_coreml_context;`

			`ctx->data = data;`

			`return ctx;`
			`}`

			`void whisper_coreml_free(struct whisper_coreml_context * ctx) {`
			`CFRelease(ctx->data);`
			`delete ctx;`
			`}`

			`void whisper_coreml_encode(`
			`const whisper_coreml_context * ctx,`
coreml : use the correct `n_mel` value (#1458) 2023-11-08 21:01:41 +01:00			`int64_t n_ctx,`
			`int64_t n_mel,`
whisper : add Core ML support (#566) * coreml : use Core ML encoder inference * coreml : simlpify whisper_encode + log messages * whisper : resolve rebase conflicts * coreml : add scripts for CoreML model generation * bench-all : recognize COREML flag 2023-04-15 12:21:27 +02:00			`float * mel,`
			`float * out) {`
			`MLMultiArray * inMultiArray = [`
			`[MLMultiArray alloc] initWithDataPointer: mel`
coreml : use the correct `n_mel` value (#1458) 2023-11-08 21:01:41 +01:00			`shape: @[@1, @(n_mel), @(n_ctx)]`
whisper : add Core ML support (#566) * coreml : use Core ML encoder inference * coreml : simlpify whisper_encode + log messages * whisper : resolve rebase conflicts * coreml : add scripts for CoreML model generation * bench-all : recognize COREML flag 2023-04-15 12:21:27 +02:00			`dataType: MLMultiArrayDataTypeFloat32`
coreml : use the correct `n_mel` value (#1458) 2023-11-08 21:01:41 +01:00			`strides: @[@(n_ctx*n_mel), @(n_ctx), @1]`
whisper : add Core ML support (#566) * coreml : use Core ML encoder inference * coreml : simlpify whisper_encode + log messages * whisper : resolve rebase conflicts * coreml : add scripts for CoreML model generation * bench-all : recognize COREML flag 2023-04-15 12:21:27 +02:00			`deallocator: nil`
			`error: nil`
			`];`

coreml : wrap inference call in @autoreleasepool to fix memory leak (#1218) 2023-08-29 14:44:38 +02:00			`@autoreleasepool {`
			`whisper_encoder_implOutput * outCoreML = [(__bridge id) ctx->data predictionFromLogmel_data:inMultiArray error:nil];`
whisper : add Core ML support (#566) * coreml : use Core ML encoder inference * coreml : simlpify whisper_encode + log messages * whisper : resolve rebase conflicts * coreml : add scripts for CoreML model generation * bench-all : recognize COREML flag 2023-04-15 12:21:27 +02:00
coreml : wrap inference call in @autoreleasepool to fix memory leak (#1218) 2023-08-29 14:44:38 +02:00			`memcpy(out, outCoreML.output.dataPointer, outCoreML.output.count * sizeof(float));`
			`}`
whisper : add Core ML support (#566) * coreml : use Core ML encoder inference * coreml : simlpify whisper_encode + log messages * whisper : resolve rebase conflicts * coreml : add scripts for CoreML model generation * bench-all : recognize COREML flag 2023-04-15 12:21:27 +02:00			`}`

			`#if __cplusplus`
			`}`
			`#endif`