diff --git a/Makefile b/Makefile index 284b0f1..4a676f1 100644 --- a/Makefile +++ b/Makefile @@ -215,9 +215,9 @@ endif ifdef WHISPER_CUBLAS ifeq ($(shell expr $(NVCC_VERSION) \>= 11.6), 1) - CUDA_ARCH_FLAG=native + CUDA_ARCH_FLAG ?= native else - CUDA_ARCH_FLAG=all + CUDA_ARCH_FLAG ?= all endif CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include diff --git a/whisper.cpp b/whisper.cpp index ba867b0..59d5cff 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -1060,7 +1060,7 @@ static ggml_backend_t whisper_backend_init(const whisper_context_params & params #ifdef GGML_USE_CUBLAS if (params.use_gpu && ggml_cublas_loaded()) { WHISPER_LOG_INFO("%s: using CUDA backend\n", __func__); - backend_gpu = ggml_backend_cuda_init(0); + backend_gpu = ggml_backend_cuda_init(params.gpu_device); if (!backend_gpu) { WHISPER_LOG_ERROR("%s: ggml_backend_cuda_init() failed\n", __func__); } @@ -3213,6 +3213,7 @@ int whisper_ctx_init_openvino_encoder( struct whisper_context_params whisper_context_default_params() { struct whisper_context_params result = { /*.use_gpu =*/ true, + /*.gpu_device =*/ 0, }; return result; } diff --git a/whisper.h b/whisper.h index 3143cea..d571a12 100644 --- a/whisper.h +++ b/whisper.h @@ -86,6 +86,7 @@ extern "C" { struct whisper_context_params { bool use_gpu; + int gpu_device; // CUDA device }; typedef struct whisper_token_data {