From d05b7ee90e22f5b927449051fb6f2ca3adc85b61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C6=A1n=20Phan=20Trung?= Date: Fri, 12 Jan 2024 19:11:04 +0700 Subject: [PATCH] models : make all scripts to be POSIX Compliant (#1725) * download-coreml-model: make it POSIX-compliant * download-ggml-model: posix compliant (2nd) * minor edit * forgot to add newline * generate-coreml-interface: far more straightforward * generate-coreml-model: done with the posix thingy * typo * Update download-ggml-model.sh * fix * fix typo * another fix * Update download-coreml-model.sh * Update download-ggml-model.sh * Update download-coreml-model.sh --- models/download-coreml-model.sh | 50 ++++++++--------- models/download-ggml-model.sh | 87 +++++++++++++++-------------- models/generate-coreml-interface.sh | 4 +- models/generate-coreml-model.sh | 20 +++---- 4 files changed, 81 insertions(+), 80 deletions(-) diff --git a/models/download-coreml-model.sh b/models/download-coreml-model.sh index 9e67a15..83f2b23 100755 --- a/models/download-coreml-model.sh +++ b/models/download-coreml-model.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh # This script downloads Whisper model files that have already been converted to Core ML format. # This way you don't have to convert them yourself. @@ -7,32 +7,32 @@ src="https://huggingface.co/datasets/ggerganov/whisper.cpp-coreml" pfx="resolve/main/ggml" # get the path of this script -function get_script_path() { +get_script_path() { if [ -x "$(command -v realpath)" ]; then - echo "$(dirname $(realpath $0))" + dirname "$(realpath "$0")" else - local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)" - echo "$ret" + _ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)" + echo "$_ret" fi } models_path="$(get_script_path)" # Whisper models -models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" ) +models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3" # list available models -function list_models { - printf "\n" - printf " Available models:" - for model in "${models[@]}"; do - printf " $model" - done - printf "\n\n" +list_models() { + printf "\n" + printf " Available models:" + for model in $models; do + printf " %s" "$models" + done + printf "\n\n" } if [ "$#" -ne 1 ]; then - printf "Usage: $0 \n" + printf "Usage: %s \n" "$0" list_models exit 1 @@ -40,8 +40,8 @@ fi model=$1 -if [[ ! " ${models[@]} " =~ " ${model} " ]]; then - printf "Invalid model: $model\n" +if ! echo "$models" | grep -q -w "$model"; then + printf "Invalid model: %s\n" "$model" list_models exit 1 @@ -49,19 +49,19 @@ fi # download Core ML model -printf "Downloading Core ML model $model from '$src' ...\n" +printf "Downloading Core ML model %s from '%s' ...\n" "$model" "$src" -cd $models_path +cd "$models_path" || exit if [ -f "ggml-$model.mlmodel" ]; then - printf "Model $model already exists. Skipping download.\n" + printf "Model %s already exists. Skipping download.\n" "$model" exit 0 fi if [ -x "$(command -v wget)" ]; then - wget --quiet --show-progress -O ggml-$model.mlmodel $src/$pfx-$model.mlmodel + wget --quiet --show-progress -O ggml-"$model".mlmodel $src/$pfx-"$model".mlmodel elif [ -x "$(command -v curl)" ]; then - curl -L --output ggml-$model.mlmodel $src/$pfx-$model.mlmodel + curl -L --output ggml-"$model".mlmodel $src/$pfx-"$model".mlmodel else printf "Either wget or curl is required to download models.\n" exit 1 @@ -69,14 +69,14 @@ fi if [ $? -ne 0 ]; then - printf "Failed to download Core ML model $model \n" + printf "Failed to download Core ML model %s \n" "$model" printf "Please try again later or download the original Whisper model files and convert them yourself.\n" exit 1 fi -printf "Done! Model '$model' saved in 'models/ggml-$model.mlmodel'\n" +printf "Done! Model '%s' saved in 'models/ggml-%s.mlmodel'\n" "$model" "$model" printf "Run the following command to compile it:\n\n" -printf " $ xcrun coremlc compile ./models/ggml-$model.mlmodel ./models\n\n" +printf " $ xcrun coremlc compile ./models/ggml-%s.mlmodel ./models\n\n" "$model" printf "You can now use it like this:\n\n" -printf " $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n" +printf " $ ./main -m models/ggml-%s.bin -f samples/jfk.wav\n" "$model" printf "\n" diff --git a/models/download-ggml-model.sh b/models/download-ggml-model.sh index c976d2f..74dece9 100755 --- a/models/download-ggml-model.sh +++ b/models/download-ggml-model.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh # This script downloads Whisper model files that have already been converted to ggml format. # This way you don't have to convert them yourself. @@ -10,54 +10,52 @@ src="https://huggingface.co/ggerganov/whisper.cpp" pfx="resolve/main/ggml" # get the path of this script -function get_script_path() { +get_script_path() { if [ -x "$(command -v realpath)" ]; then - echo "$(dirname "$(realpath "$0")")" + dirname "$(realpath "$0")" else - local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)" - echo "$ret" + _ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)" + echo "$_ret" fi } models_path="${2:-$(get_script_path)}" # Whisper models -models=( - "tiny.en" - "tiny" - "tiny-q5_1" - "tiny.en-q5_1" - "base.en" - "base" - "base-q5_1" - "base.en-q5_1" - "small.en" - "small.en-tdrz" - "small" - "small-q5_1" - "small.en-q5_1" - "medium" - "medium.en" - "medium-q5_0" - "medium.en-q5_0" - "large-v1" - "large-v2" - "large-v3" - "large-v3-q5_0" -) +models="tiny.en +tiny +tiny-q5_1 +tiny.en-q5_1 +base.en +base +base-q5_1 +base.en-q5_1 +small.en +small.en-tdrz +small +small-q5_1 +small.en-q5_1 +medium +medium.en +medium-q5_0 +medium.en-q5_0 +large-v1 +large-v2 +large-v3 +large-v3-q5_0" # list available models -function list_models { +list_models() { printf "\n" printf " Available models:" - for model in "${models[@]}"; do - printf " $model" + for model in $models; do + printf " %s" "$model" done printf "\n\n" } if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then - printf "Usage: $0 [models_path]\n" + printf "Usage: %s [models_path]\n" "$0" list_models exit 1 @@ -65,34 +63,36 @@ fi model=$1 -if [[ ! " ${models[@]} " =~ " ${model} " ]]; then - printf "Invalid model: $model\n" +if ! echo "$models" | grep -q -w "$model"; then + printf "Invalid model: %s\n" "$model" list_models exit 1 fi # check if model contains `tdrz` and update the src and pfx accordingly -if [[ $model == *"tdrz"* ]]; then +if echo "$model" | grep -q "tdrz"; then src="https://huggingface.co/akashmjn/tinydiarize-whisper.cpp" pfx="resolve/main/ggml" fi +echo "$model" | grep -q '^"tdrz"*$' + # download ggml model -printf "Downloading ggml model $model from '$src' ...\n" +printf "Downloading ggml model %s from '%s' ...\n" "$model" "$src" -cd "$models_path" +cd "$models_path" || exit if [ -f "ggml-$model.bin" ]; then - printf "Model $model already exists. Skipping download.\n" + printf "Model %s already exists. Skipping download.\n" "$model" exit 0 fi if [ -x "$(command -v wget)" ]; then - wget --no-config --quiet --show-progress -O ggml-$model.bin $src/$pfx-$model.bin + wget --no-config --quiet --show-progress -O ggml-"$model".bin $src/$pfx-"$model".bin elif [ -x "$(command -v curl)" ]; then - curl -L --output ggml-$model.bin $src/$pfx-$model.bin + curl -L --output ggml-"$model".bin $src/$pfx-"$model".bin else printf "Either wget or curl is required to download models.\n" exit 1 @@ -100,12 +100,13 @@ fi if [ $? -ne 0 ]; then - printf "Failed to download ggml model $model \n" + printf "Failed to download ggml model %s \n" "$model" printf "Please try again later or download the original Whisper model files and convert them yourself.\n" exit 1 fi -printf "Done! Model '$model' saved in '$models_path/ggml-$model.bin'\n" + +printf "Done! Model '%s' saved in '%s/ggml-%s.bin'\n" "$model" "$models_path" "$model" printf "You can now use it like this:\n\n" -printf " $ ./main -m $models_path/ggml-$model.bin -f samples/jfk.wav\n" +printf " $ ./main -m %s/ggml-%s.bin -f samples/jfk.wav\n" "$models_path" "$model" printf "\n" diff --git a/models/generate-coreml-interface.sh b/models/generate-coreml-interface.sh index 553d5f6..b205eb1 100755 --- a/models/generate-coreml-interface.sh +++ b/models/generate-coreml-interface.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh # # This generates: # - coreml/whisper-encoder-impl.h and coreml/whisper-encoder-impl.m @@ -6,7 +6,7 @@ # wd=$(dirname "$0") -cd "$wd/../" +cd "$wd/../" || exit python3 models/convert-whisper-to-coreml.py --model tiny.en diff --git a/models/generate-coreml-model.sh b/models/generate-coreml-model.sh index cb8be6d..8f96fde 100755 --- a/models/generate-coreml-model.sh +++ b/models/generate-coreml-model.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh # Usage: ./generate-coreml-model.sh if [ $# -eq 0 ]; then @@ -6,7 +6,7 @@ if [ $# -eq 0 ]; then echo "Usage for Whisper models: ./generate-coreml-model.sh " echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 " exit 1 -elif [[ "$1" == "-h5" && $# != 3 ]]; then +elif [ "$1" = "-h5" ] && [ $# != 3 ]; then echo "No model name and model path supplied for a HuggingFace model" echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 " exit 1 @@ -15,20 +15,20 @@ fi mname="$1" wd=$(dirname "$0") -cd "$wd/../" +cd "$wd/../" || exit -if [[ $mname == "-h5" ]]; then +if [ "$mname" = "-h5" ]; then mname="$2" mpath="$3" - echo $mpath - python3 models/convert-h5-to-coreml.py --model-name $mname --model-path $mpath --encoder-only True + echo "$mpath" + python3 models/convert-h5-to-coreml.py --model-name "$mname" --model-path "$mpath" --encoder-only True else - python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True --optimize-ane True + python3 models/convert-whisper-to-coreml.py --model "$mname" --encoder-only True --optimize-ane True fi -xcrun coremlc compile models/coreml-encoder-${mname}.mlpackage models/ -rm -rf models/ggml-${mname}-encoder.mlmodelc -mv -v models/coreml-encoder-${mname}.mlmodelc models/ggml-${mname}-encoder.mlmodelc +xcrun coremlc compile models/coreml-encoder-"${mname}".mlpackage models/ +rm -rf models/ggml-"${mname}"-encoder.mlmodelc +mv -v models/coreml-encoder-"${mname}".mlmodelc models/ggml-"${mname}"-encoder.mlmodelc # TODO: decoder (sometime in the future maybe) #xcrun coremlc compile models/whisper-decoder-${mname}.mlpackage models/