libpca is a C++ library for Principal Component Analysis that builds upon Armadillo, a linear algebra library.
I am having a problem with it, though. I am comparing its output with the example given by Lindsay Smith in his great tutorial on PCA. When I retrieve the first principal component, I get the same values as Smith in his tutorial but with its signs inverted. For the second principal component the signs and values are correct.
Anyone know why this is?
Code:
#include "pca.h"
#include <iostream>
using namespace std;
int main(int argc, char** argv) {
stats::pca pca(2);
double* elements = new double[20]{2.5, 2.4, 0.5, 0.7, 2.2, 2.9, 1.9, 2.2, 3.1, 3.0, 2.3, 2.7, 2, 1.6, 1, 1.1, 1.5, 1.6, 1.1, 0.9};
for (int i = 0; i < 20; i++) {
vector<double> record;
record.push_back(elements[i++]);
record.push_back(elements[i]);
pca.add_record(record);
}
pca.solve();
const vector<double> principal_1 = pca.get_principal(0);
for (int i = 0; i < principal_1.size(); i++)
cout << principal_1[i] << " ";
cout << endl;
const vector<double> principal_2 = pca.get_principal(1);
for (int i = 0; i < principal_2.size(); i++)
cout << principal_2[i] << " ";
cout << endl;
delete elements;
return 0;
}
Output:
0.82797 -1.77758 0.992197 0.27421 1.6758 0.912949 -0.0991094 -1.14457 -0.438046 -1.22382
-0.175115 0.142857 0.384375 0.130417 -0.209498 0.175282 -0.349825 0.0464173 0.0177646 -0.162675