feat: add hipBlas support (#94)

pull/148/head master-c6071fa
旺旺碎冰冰 2024-01-14 11:53:42 +08:00 committed by GitHub
parent 5c614e4bc2
commit c6071fa82f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 113 additions and 2 deletions

2
.gitmodules vendored
View File

@ -1,3 +1,3 @@
[submodule "ggml"]
path = ggml
url = https://github.com/leejet/ggml.git
url = https://github.com/ggerganov/ggml.git

View File

@ -25,6 +25,7 @@ endif()
#option(SD_BUILD_TESTS "sd: build tests" ${SD_STANDALONE})
option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
option(SD_CUBLAS "sd: cuda backend" OFF)
option(SD_HIPBLAS "sd: rocm backend" OFF)
option(SD_METAL "sd: metal backend" OFF)
option(SD_FLASH_ATTN "sd: use flash attention for x4 less memory usage" OFF)
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
@ -46,6 +47,15 @@ if(SD_METAL)
add_definitions(-DSD_USE_METAL)
endif()
if (SD_HIPBLAS)
message("Use HIPBLAS as backend stable-diffusion")
set(GGML_HIPBLAS ON)
add_definitions(-DSD_USE_CUBLAS)
if(SD_FAST_SOFTMAX)
set(GGML_CUDA_FAST_SOFTMAX ON)
endif()
endif ()
if(SD_FLASH_ATTN)
message("Use Flash Attention for memory optimization")
add_definitions(-DSD_USE_FLASH_ATTENTION)
@ -67,6 +77,10 @@ endif()
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
# see https://github.com/ggerganov/ggml/pull/682
add_definitions(-DGGML_MAX_NAME=128)
# deps
add_subdirectory(ggml)

View File

@ -117,6 +117,17 @@ cmake .. -DSD_CUBLAS=ON
cmake --build . --config Release
```
##### Using HipBLAS
This provides BLAS acceleration using the ROCm cores of your AMD GPU. Make sure to have the ROCm toolkit installed.
Windows User Refer to [docs/hipBLAS_on_Windows.md](docs%2FhipBLAS_on_Windows.md) for a comprehensive guide.
```
cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100
cmake --build . --config Release
```
##### Using Metal
Using Metal makes the computation run on the GPU. Currently, there are some issues with Metal when performing operations on very large matrices, making it highly inefficient at the moment. Performance improvements are expected in the near future.

View File

@ -0,0 +1,85 @@
# Using hipBLAS on Windows
To get hipBLAS in `stable-diffusion.cpp` working on Windows, go through this guide section by section.
## Build Tools for Visual Studio 2022
Skip this step if you already have Build Tools installed.
To install Build Tools, go to [Visual Studio Downloads](https://visualstudio.microsoft.com/vs/), download `Visual Studio 2022 and other Products` and run the installer.
## CMake
Skip this step if you already have CMake installed: running `cmake --version` should output `cmake version x.y.z`.
Download latest `Windows x64 Installer` from [Download | CMake](https://cmake.org/download/) and run it.
## ROCm
Skip this step if you already have Build Tools installed.
The [validation tools](https://rocm.docs.amd.com/en/latest/reference/validation_tools.html) not support on Windows. So you should confirm the Version of `ROCM` by yourself.
Fortunately, `AMD` provides complete help documentation, you can use the help documentation to install [ROCM](https://rocm.docs.amd.com/en/latest/deploy/windows/quick_start.html)
>**If you encounter an error, if it is [AMD ROCm Windows Installation Error 215](https://github.com/RadeonOpenCompute/ROCm/issues/2363), don't worry about this error. ROCM has been installed correctly, but the vs studio plugin installation failed, we can ignore it.**
Then we must set `ROCM` as environment variables before running cmake.
Usually if you install according to the official tutorial and do not modify the ROCM path, then there is a high probability that it is here `C:\Program Files\AMD\ROCm\5.5\bin`
This is what I use to set the clang:
```Commandline
set CC=C:\Program Files\AMD\ROCm\5.5\bin\clang.exe
set CXX=C:\Program Files\AMD\ROCm\5.5\bin\clang++.exe
```
## Ninja
Skip this step if you already have Ninja installed: running `ninja --version` should output `1.11.1`.
Download latest `ninja-win.zip` from [GitHub Releases Page](https://github.com/ninja-build/ninja/releases/tag/v1.11.1) and unzip. Then set as environment variables. I unzipped it in `C:\Program Files\ninja`, so I set it like this:
```Commandline
set ninja=C:\Program Files\ninja\ninja.exe
```
## Building stable-diffusion.cpp
The thing different from the regular CPU build is `-DSD_HIPBLAS=ON` ,
`-G "Ninja"`, `-DCMAKE_C_COMPILER=clang`, `-DCMAKE_CXX_COMPILER=clang++`, `-DAMDGPU_TARGETS=gfx1100`
>**Notice**: check the `clang` and `clang++` information:
```Commandline
clang --version
clang++ --version
```
If you see like this, we can continue:
```
clang version 17.0.0 (git@github.amd.com:Compute-Mirrors/llvm-project e3201662d21c48894f2156d302276eb1cf47c7be)
Target: x86_64-pc-windows-msvc
Thread model: posix
InstalledDir: C:\Program Files\AMD\ROCm\5.5\bin
```
```
clang version 17.0.0 (git@github.amd.com:Compute-Mirrors/llvm-project e3201662d21c48894f2156d302276eb1cf47c7be)
Target: x86_64-pc-windows-msvc
Thread model: posix
InstalledDir: C:\Program Files\AMD\ROCm\5.5\bin
```
>**Notice** that the `gfx1100` is the GPU architecture of my GPU, you can change it to your GPU architecture. Click here to see your architecture [LLVM Target](https://rocm.docs.amd.com/en/latest/release/windows_support.html#windows-supported-gpus)
My GPU is AMD Radeon™ RX 7900 XTX Graphics, so I set it to `gfx1100`.
option:
```commandline
mkdir build
cd build
cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100
cmake --build . --config Release
```
If everything went OK, `build\bin\sd.exe` file should appear.

2
ggml

@ -1 +1 @@
Subproject commit 5e449697f0e9e4c3dff7e66e31bcce37a7517a1b
Subproject commit 2f3b12fbd6cf4cb41ad4c8fdfd65e937f5c92093

View File

@ -71,6 +71,7 @@ enum sd_type_t {
SD_TYPE_Q5_K = 13,
SD_TYPE_Q6_K = 14,
SD_TYPE_Q8_K = 15,
SD_TYPE_IQ2_XXS = 16,
SD_TYPE_I8,
SD_TYPE_I16,
SD_TYPE_I32,