C++中实现Adam和RMSProp优化算法
以下是一些关于C++中实现Adam和RMSProp优化算法的实用示例和资源,涵盖不同场景和应用。由于篇幅限制,完整代码,但提供关键实现片段、库使用方法和学习资源。
基础Adam优化器实现
Adam优化器实现
#include <vector>
#include <cmath>
class AdamOptimizer {
public:
AdamOptimizer(float lr = 0.001, float beta1 = 0.9, float beta2 = 0.999, float eps = 1e-8)
: learning_rate(lr), beta1(beta1), beta2(beta2), epsilon(eps), t(0) {}
void update(std::vector<float>& params, const std::vector<float>& grads) {
t++;
if (m.empty()) {
m.resize(params.size(), 0.0f);
v.resize(params.size(), 0.0f);
}
for (size_t i = 0; i < params.size(); ++i) {
m[i] = beta1 * m[i] + (1 - beta1) * grads[i];
v[i] = beta2 * v[i] + (1 - beta2) * grads[i] * grads[i];
float m_hat = m[i] / (1 - std::pow(beta1, t));
float v_hat = v[i] / (1 - std::pow(beta2, t));
params[i] -= learning_rate * m_hat / (std::sqrt(v_hat) + epsilon);
}
}
private:
float learning_rate, beta1, beta2, epsilon;
std::vector<float> m, v;
int t;
};
基础RMSProp实现
class RMSPropOptimizer {
public:
RMSPropOptimizer(float lr = 0.001, float decay = 0.9, float eps = 1e-8)
: learning_rate(lr), decay_rate(decay), epsilon(eps) {}
void update(std::vector<float>& params, const std::vector<float>& grads) {
if (cache.empty()) {
cache.resize(params.size(), 0.0f);
}
for (size_t i = 0; i < params.size(); ++i) {
cache[i] = decay_rate * cache[i] + (1 - decay_rate) * grads[i] * grads[i];
params[i] -= learning_rate * grads[i] / (std::sqrt(cache[i]) + epsilon);
}
}
private:
float learning_rate, decay_rate, epsilon;
std::vector<float> cache;
};
使用Eigen库实现矩阵优化
#include <Eigen/Dense>
void AdamWithEigen(Eigen::MatrixXf& params, const Eigen::MatrixXf& grads) {
static Eigen::MatrixXf m = Eigen::MatrixXf::Zero(params.rows(), params.cols());
static Eigen::MatrixXf v = Eigen::MatrixXf::Zero(params.rows(), params.cols());
static int t = 0;
const float lr = 0.001, beta1 = 0.9, beta2 = 0.999, eps = 1e-8;
t++;
m = beta1 * m + (1 - beta1) * grads;
v = beta2 * v + (1 - beta2) * grads.array().square().matrix();
Eigen::MatrixXf m_hat = m / (1 - std::pow(beta1, t));
Eigen::MatrixXf v_hat = v / (1 - std::pow(beta2, t));
params -= lr * m_hat.array() / (v_hat.array().sqrt() + eps);
}
PyTorch C++ API示例
#include <torch/torch.h>
torch::Tensor adam_update(torch::Tensor& param, const torch::Tensor& grad) {
static torch::Tensor m = torch::zeros_like(param);
static torch::Tensor v = torch::zeros_like(param);
static int64_t t = 0;
t++;
m = 0.9 * m + (1 - 0.9) * grad;
v = 0.999 * v + (1 - 0.999) * grad.pow(2);
auto m_hat = m / (1 - std::pow(0.9, t));
auto v_hat = v / (1 - std::pow