高斯牛顿法实例c++
高斯牛顿法用来解决非线性最优问题的常见算法之一,算法的思想就是通过不断的减少总体残差来实现最优解的求解。用来求解目标函数的解析解。
算法的思路也比较简单:
可以参照原理解释以及原理示例
主要的思想就是不断地朝着梯度的方向改变参数的值直到残差最小化,梯度下降?哈哈哈 果然数学很重要。
示例如下:
1、 y = Ax1 + Bx2**2 A=2 B=1 已知数据点求解最优A B
#include <cstdio>
#include <vector>
#include <opencv2/core/core.hpp>
using namespace std;
using namespace cv;
const double DERIV_STEP = 1e-5;
const int MAX_ITER = 100;
// y = Ax1 + Bx2**2 A=2 B=1
// 3 1 1
// 6 1 2
// 5 2 1
// 1 0 1
// 2 1 0
// 7 3 1
// 11 1 3
void GaussNewton(double(*Func)(const Mat& input, const Mat& input2, const Mat params), // function pointer
const Mat& inputs, const Mat& inputs2, const Mat& outputs, Mat params);
double Deriv(double(*Func)(const Mat& input, const Mat params), // function pointer
const Mat& input, const Mat params, int n);
// The user defines their function here
double Func(const Mat& input, const Mat& input2, const Mat params);
int main()
{
// For this demo we're going to try and fit to the function
// F = A*exp(t*B)
// There are 2 parameters: A B
int num_params = 2;
// Generate random data using these parameters
int total_data = 7;
Mat inputs(total_data, 1, CV_64F);
Mat inputs2(total_data, 1, CV_64F);
Mat outputs(total_data, 1, CV_64F);
//load observation data
//for (int i = 0; i < total_data; i++) {
// inputs.at<double>(i, 0) = i + 1; //load year
//}
//
inputs.at<double>(0, 0) = 1;
inputs.at<double>(1, 0) = 1;
inputs.at<double>(2, 0) = 2;
inputs.at<double>(3, 0) = 0;
inputs.at<double>(4, 0) = 1;
inputs.at<double>(5, 0) = 3;
inputs.at<double>(6, 0) = 1;
inputs2.at<double>(0, 0) = 1;
inputs2.at<double>(1, 0) = 2;
inputs2.at<double>(2, 0) = 1;
inputs2.at<double>(3, 0) = 1;
inputs2.at<double>(4, 0) = 0;
inputs2.at<double>(5, 0) = 1;
inputs2.at<double>(6, 0) = 3;
//load America population
outputs.at<double>(0, 0) = 3;
outputs.at<double>(1, 0) = 6;
outputs.at<double>(2, 0) = 5;
outputs.at<double>(3, 0) = 1;
outputs.at<double>(4, 0) = 2;
outputs.at<double>(5, 0) = 7;
outputs.at<double>(6, 0) = 11;
// Guess the parameters, it should be close to the true value, else it can fail for very sensitive functions!
Mat params(num_params, 1, CV_64F);
//init guess
params.at<double>(0, 0) = 0.1;
params.at<double>(1, 0) = 0.1;
GaussNewton(Func, inputs, inputs2, outputs, params);
printf("Parameters from GaussNewton: %f %f\n", params.at<double>(0, 0), params.at<double>(1, 0));
return 0;
}
double Func(const Mat& input, const Mat& input2, const Mat params)
{
// Assumes input is a single row matrix
// Assumes params is a column matrix
double A = params.at<double>(0, 0);
double B = params.at<double>(1, 0);
double x = input.at<double>(0, 0);
double x2 = input2.at<double>(0, 0);
return A * x + B * pow(x2, 2);
}
//calc the n-th params' partial derivation , the params are our final target
double Deriv(double(*Func)(const Mat& input, const Mat& input2, const Mat params), const Mat& input, const Mat& input2, const Mat params, int n)
{
// Assumes input is a single row matrix
// Returns the derivative of the nth parameter
Mat params1 = params.clone();
Mat params2 = params.clone();
// Use central difference to get derivative
params1.at<double>(n, 0) -= DERIV_STEP;
params2.at<double>(n, 0) += DERIV_STEP;
double p1 = Func(input, input2, params1);
double p2 = Func(input, input2, params2);
double d = (p2 - p1) / (2 * DERIV_STEP);
return d;
}
void GaussNewton(double(*Func)(const Mat& input, const Mat& input2, const Mat params),
const Mat& inputs, const Mat& inputs2, const Mat& outputs, Mat params)
{
int m = inputs.rows;
int n = inputs.cols;
int num_params = params.rows;
Mat r(m, 1, CV_64F); // residual matrix
Mat Jf(m, num_params, CV_64F); // Jacobian of Func()
Mat input(1, n, CV_64F); // single row input
Mat input2(1, n, CV_64F); // single row input
double last_mse = 0;
for (int i = 0; i < MAX_ITER; i++) {
double mse = 0;
for (int j = 0; j < m; j++) {
for (int k = 0; k < n; k++) {//copy Independent variable vector, the year
input.at<double>(0, k) = inputs.at<double>(j, k);
input2.at<double>(0, k) = inputs2.at<double>(j, k);
}
r.at<double>(j, 0) = outputs.at<double>(j, 0) - Func(input, input2, params);//diff between estimate and observation population
mse += r.at<double>(j, 0) * r.at<double>(j, 0);
for (int k = 0; k < num_params; k++) {
Jf.at<double>(j, k) = Deriv(Func, input, input2, params, k);
}
}
mse /= m;
// The difference in mse is very small, so quit
if (fabs(mse - last_mse) < 1e-8) {
break;
}
Mat delta = ((Jf.t() * Jf)).inv() * Jf.t() * r;
params += delta;
//printf("%d: mse=%f\n", i, mse);
printf("%d %f\n", i, mse);
last_mse = mse;
}
}
2、对于 n对数据 x1 x2 若 E =sum(loop i in n 3 – yi) , y = Ax1 + Bx2**2, 要E最小,那么y要接近3 及x1 x2 对应的y应该越接近3越好.
#include <cstdio>
#include <vector>
#include <opencv2/core/core.hpp>
using namespace std;
using namespace cv;
const double DERIV_STEP = 1e-5;
const int MAX_ITER = 100;
// y = Ax1 + Bx2**2 A=2 B=1
void GaussNewton(double(*Func)(const Mat& input, const Mat& input2, const Mat params), // function pointer
const Mat& inputs, const Mat& inputs2, const Mat& outputs, Mat params);
double Deriv(double(*Func)(const Mat& input, const Mat params), // function pointer
const Mat& input, const Mat params, int n);
// The user defines their function here
double Func(const Mat& input, const Mat& input2, const Mat params);
int main()
{
// For this demo we're going to try and fit to the function
// F = A*exp(t*B)
// There are 2 parameters: A B
int num_params = 2;
// Generate random data using these parameters
int total_data = 7;
Mat inputs(total_data, 1, CV_64F);
Mat inputs2(total_data, 1, CV_64F);
Mat outputs(total_data, 1, CV_64F);
//load observation data
//for (int i = 0; i < total_data; i++) {
// inputs.at<double>(i, 0) = i + 1; //load year
//}
//
inputs.at<double>(0, 0) = 1.001;
inputs.at<double>(1, 0) = 1.002;
inputs.at<double>(2, 0) = 1.003;
inputs.at<double>(3, 0) = 1.004;
inputs.at<double>(4, 0) = 1.005;
inputs.at<double>(5, 0) = 1.006;
inputs.at<double>(6, 0) = 1.007;
inputs2.at<double>(0, 0) = 1.007;
inputs2.at<double>(1, 0) = 1.006;
inputs2.at<double>(2, 0) = 1.005;
inputs2.at<double>(3, 0) = 1.004;
inputs2.at<double>(4, 0) = 1.003;
inputs2.at<double>(5, 0) = 1.002;
inputs2.at<double>(6, 0) = 1.001;
//load America population
outputs.at<double>(0, 0) = 3;
outputs.at<double>(1, 0) = 3;
outputs.at<double>(2, 0) = 3;
outputs.at<double>(3, 0) = 3;
outputs.at<double>(4, 0) = 3;
outputs.at<double>(5, 0) = 3;
outputs.at<double>(6, 0) = 3;
// Guess the parameters, it should be close to the true value, else it can fail for very sensitive functions!
Mat params(num_params, 1, CV_64F);
//init guess
params.at<double>(0, 0) = 1;
params.at<double>(1, 0) = 1;
GaussNewton(Func, inputs, inputs2, outputs, params);
printf("Parameters from GaussNewton: %f %f\n", params.at<double>(0, 0), params.at<double>(1, 0));
return 0;
}
double Func(const Mat& input, const Mat& input2, const Mat params)
{
// Assumes input is a single row matrix
// Assumes params is a column matrix
double A = params.at<double>(0, 0);
double B = params.at<double>(1, 0);
double x = input.at<double>(0, 0);
double x2 = input2.at<double>(0, 0);
return A * x + B * pow(x2, 2);
}
//calc the n-th params' partial derivation , the params are our final target
double Deriv(double(*Func)(const Mat& input, const Mat& input2, const Mat params), const Mat& input, const Mat& input2, const Mat params, int n)
{
// Assumes input is a single row matrix n为 0 和 1
// Returns the derivative of the nth parameter
Mat params1 = params.clone();
Mat params2 = params.clone();
// Use central difference to get derivative
params1.at<double>(n, 0) -= DERIV_STEP;
params2.at<double>(n, 0) += DERIV_STEP;
double p1 = Func(input, input2, params1);
double p2 = Func(input, input2, params2);
double d = (p2 - p1) / (2 * DERIV_STEP);
return d;
}
void GaussNewton(double(*Func)(const Mat& input, const Mat& input2, const Mat params),
const Mat& inputs, const Mat& inputs2, const Mat& outputs, Mat params)
{
int m = inputs.rows;
int n = inputs.cols;
int num_params = params.rows;
Mat r(m, 1, CV_64F); // residual matrix
Mat Jf(m, num_params, CV_64F); // Jacobian of Func()
Mat input(1, n, CV_64F); // single row input
Mat input2(1, n, CV_64F); // single row input
double last_mse = 0;
for (int i = 0; i < MAX_ITER; i++) {
double mse = 0;
for (int j = 0; j < m; j++) {
//for (int k = 0; k < n; k++) {//copy Independent variable vector, the year
input.at<double>(0, 0) = inputs.at<double>(j, 0);
input2.at<double>(0, 0) = inputs2.at<double>(j, 0);
//}
r.at<double>(j, 0) = outputs.at<double>(j, 0) - Func(input, input2, params);//diff between estimate and observation population
mse += r.at<double>(j, 0) * r.at<double>(j, 0);
//for (int k = 0; k < num_params; k++) {
Jf.at<double>(j, 0) = Deriv(Func, input, input2, params, 0);
Jf.at<double>(j, 1) = Deriv(Func, input, input2, params, 1);
//}
}
mse /= m;
// The difference in mse is very small, so quit
if (fabs(mse - last_mse) < 1e-20) {
break;
}
Mat delta = ((Jf.t() * Jf)).inv() * Jf.t() * r;
params += delta;
//printf("%d: mse=%f\n", i, mse);
printf("%d %f\n", i, mse);
last_mse = mse;
}
}
版权声明:本文为weixin_43151193原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。