125 lines
3.8 KiB
C++
125 lines
3.8 KiB
C++
#ifndef __DENOISER_HPP__
|
|
#define __DENOISER_HPP__
|
|
|
|
#include "ggml_extend.hpp"
|
|
|
|
/*================================================= CompVisDenoiser ==================================================*/
|
|
|
|
// Ref: https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/external.py
|
|
|
|
#define TIMESTEPS 1000
|
|
|
|
struct SigmaSchedule {
|
|
float alphas_cumprod[TIMESTEPS];
|
|
float sigmas[TIMESTEPS];
|
|
float log_sigmas[TIMESTEPS];
|
|
|
|
virtual std::vector<float> get_sigmas(uint32_t n) = 0;
|
|
|
|
float sigma_to_t(float sigma) {
|
|
float log_sigma = std::log(sigma);
|
|
std::vector<float> dists;
|
|
dists.reserve(TIMESTEPS);
|
|
for (float log_sigma_val : log_sigmas) {
|
|
dists.push_back(log_sigma - log_sigma_val);
|
|
}
|
|
|
|
int low_idx = 0;
|
|
for (size_t i = 0; i < TIMESTEPS; i++) {
|
|
if (dists[i] >= 0) {
|
|
low_idx++;
|
|
}
|
|
}
|
|
low_idx = std::min(std::max(low_idx - 1, 0), TIMESTEPS - 2);
|
|
int high_idx = low_idx + 1;
|
|
|
|
float low = log_sigmas[low_idx];
|
|
float high = log_sigmas[high_idx];
|
|
float w = (low - log_sigma) / (low - high);
|
|
w = std::max(0.f, std::min(1.f, w));
|
|
float t = (1.0f - w) * low_idx + w * high_idx;
|
|
|
|
return t;
|
|
}
|
|
|
|
float t_to_sigma(float t) {
|
|
int low_idx = static_cast<int>(std::floor(t));
|
|
int high_idx = static_cast<int>(std::ceil(t));
|
|
float w = t - static_cast<float>(low_idx);
|
|
float log_sigma = (1.0f - w) * log_sigmas[low_idx] + w * log_sigmas[high_idx];
|
|
return std::exp(log_sigma);
|
|
}
|
|
};
|
|
|
|
struct DiscreteSchedule : SigmaSchedule {
|
|
std::vector<float> get_sigmas(uint32_t n) {
|
|
std::vector<float> result;
|
|
|
|
int t_max = TIMESTEPS - 1;
|
|
|
|
if (n == 0) {
|
|
return result;
|
|
} else if (n == 1) {
|
|
result.push_back(t_to_sigma((float)t_max));
|
|
result.push_back(0);
|
|
return result;
|
|
}
|
|
|
|
float step = static_cast<float>(t_max) / static_cast<float>(n - 1);
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
float t = t_max - step * i;
|
|
result.push_back(t_to_sigma(t));
|
|
}
|
|
result.push_back(0);
|
|
return result;
|
|
}
|
|
};
|
|
|
|
struct KarrasSchedule : SigmaSchedule {
|
|
std::vector<float> get_sigmas(uint32_t n) {
|
|
// These *COULD* be function arguments here,
|
|
// but does anybody ever bother to touch them?
|
|
float sigma_min = 0.1f;
|
|
float sigma_max = 10.f;
|
|
float rho = 7.f;
|
|
|
|
std::vector<float> result(n + 1);
|
|
|
|
float min_inv_rho = pow(sigma_min, (1.f / rho));
|
|
float max_inv_rho = pow(sigma_max, (1.f / rho));
|
|
for (uint32_t i = 0; i < n; i++) {
|
|
// Eq. (5) from Karras et al 2022
|
|
result[i] = pow(max_inv_rho + (float)i / ((float)n - 1.f) * (min_inv_rho - max_inv_rho), rho);
|
|
}
|
|
result[n] = 0.;
|
|
return result;
|
|
}
|
|
};
|
|
|
|
struct Denoiser {
|
|
std::shared_ptr<SigmaSchedule> schedule = std::make_shared<DiscreteSchedule>();
|
|
virtual std::vector<float> get_scalings(float sigma) = 0;
|
|
};
|
|
|
|
struct CompVisDenoiser : public Denoiser {
|
|
float sigma_data = 1.0f;
|
|
|
|
std::vector<float> get_scalings(float sigma) {
|
|
float c_out = -sigma;
|
|
float c_in = 1.0f / std::sqrt(sigma * sigma + sigma_data * sigma_data);
|
|
return {c_out, c_in};
|
|
}
|
|
};
|
|
|
|
struct CompVisVDenoiser : public Denoiser {
|
|
float sigma_data = 1.0f;
|
|
|
|
std::vector<float> get_scalings(float sigma) {
|
|
float c_skip = sigma_data * sigma_data / (sigma * sigma + sigma_data * sigma_data);
|
|
float c_out = -sigma * sigma_data / std::sqrt(sigma * sigma + sigma_data * sigma_data);
|
|
float c_in = 1.0f / std::sqrt(sigma * sigma + sigma_data * sigma_data);
|
|
return {c_skip, c_out, c_in};
|
|
}
|
|
};
|
|
|
|
#endif // __DENOISER_HPP__
|