summaryrefslogtreecommitdiff
path: root/OASIS/c/src/ekd_pca.c
diff options
context:
space:
mode:
Diffstat (limited to 'OASIS/c/src/ekd_pca.c')
-rw-r--r--OASIS/c/src/ekd_pca.c286
1 files changed, 286 insertions, 0 deletions
diff --git a/OASIS/c/src/ekd_pca.c b/OASIS/c/src/ekd_pca.c
new file mode 100644
index 0000000..f76d20e
--- /dev/null
+++ b/OASIS/c/src/ekd_pca.c
@@ -0,0 +1,286 @@
+#include "ekd_pca.h"
+
+void centre_scale(float** input_matrix, uint32_t rows, uint32_t cols, centre_scale_operation_e operation) {
+ float* means = (float*) malloc(cols * sizeof(float));
+ float* std_devs = (float*) malloc(cols * sizeof(float));
+
+ // Iterating column by column
+ for (uint32_t j = 0; j<cols; j++) {
+ float col_sum = 0.0;
+ for (uint32_t i = 0; i<rows; i++){
+ col_sum += input_matrix[i][j];
+ }
+ means[j] = col_sum / (float)rows;
+
+ float variance = 0.0;
+ for (uint32_t i = 0; i<rows; i++) variance += (input_matrix[i][j] - means[j]) * (input_matrix[i][j] - means[j]);
+ std_devs[j] = (variance < 1e-6) ? 1e-6 : sqrtf(variance / (float)rows);
+ }
+
+ // Centering -> subtract means, Scaling -> Divide by std_devs
+ for (uint32_t j=0; j<cols; j++) {
+ for (uint32_t i=0; i<rows; i++) {
+ if (operation == MEAN_UNIT_VARIANCE) {
+ input_matrix[i][j] = (input_matrix[i][j] - means[j]) / std_devs[j];
+ } else if (operation == MEAN) {
+ input_matrix[i][j] = (input_matrix[i][j] - means[j]);
+ }
+ }
+ }
+
+ free(means);
+ free(std_devs);
+}
+
+void covariance(float** input_matrix, uint32_t rows, uint32_t cols, float** covariance_matrix) {
+ float** transposed_matrix = (float**) malloc(cols * sizeof(float*));
+ for (uint32_t i=0; i<cols; i++) transposed_matrix[i] = (float*) malloc(rows * sizeof(float));
+
+ transpose(input_matrix, rows, cols, transposed_matrix);
+
+ matrix_operation(transposed_matrix, cols, rows, input_matrix, rows, cols, covariance_matrix, MM_MULTIPLY_GEMM);
+
+ // Divide by n since we have full population
+ for (uint32_t i=0; i<cols; i++) {
+ for (uint32_t j=0; j<cols; j++) covariance_matrix[i][j] /= rows-1;
+ }
+
+ for (uint32_t i=0; i<cols; i++) free(transposed_matrix[i]);
+ free(transposed_matrix);
+}
+
+void house_holder_vector(float* vector, uint32_t size) {
+
+ // Computes the norm of the input vector
+ float norm_input_vector = 0.0;
+ for (uint32_t i=0; i<size; i++) norm_input_vector += vector[i] * vector[i];
+ norm_input_vector = sqrt(norm_input_vector);
+
+ // Since the rest of output vector already is the input vector, change the first index
+ vector[0] = vector[0] + sign(vector[0]) * norm_input_vector;
+}
+
+void house_holder_transformation(float* house_holder_vector, uint32_t size, float** house_holder_matrix) {
+ float** identity_matrix = (float**) malloc(size * sizeof(float*));
+ for (uint32_t i=0; i<size; i++) identity_matrix[i] = (float*) malloc(size * sizeof(float));
+
+ float** numerator = (float**) malloc(size * sizeof(float*));
+ for (uint32_t i=0; i<size; i++) numerator[i] = (float*) malloc(size * sizeof(float));
+
+ eye(identity_matrix, size);
+
+ float denominator = inner_product(house_holder_vector, house_holder_vector, size);
+
+ outer_product(house_holder_vector, size, numerator);
+
+ for (uint32_t i=0; i<size; i++) {
+ for (uint32_t j=0; j<size; j++) numerator[i][j] = (denominator == 0) ? 0 : (2 / denominator) * numerator[i][j];
+ }
+
+ matrix_operation(identity_matrix, size, size, numerator, size, size, house_holder_matrix, MM_SUBTRACT);
+
+ for (uint32_t i=0; i<size; i++){
+ free(numerator[i]);
+ free(identity_matrix[i]);
+ }
+ free(numerator);
+ free(identity_matrix);
+}
+
+void qr_decomposition(float** input_matrix, uint32_t size, float** resultant_matrix) {
+ float** matrix_Q = (float**) malloc(size * sizeof(float*)); // Has size x size dimensions
+ float** matrix_Q_prime = (float**) malloc(size * sizeof(float*)); // Has size x size dimensions
+ float** matrix_R = (float**) malloc(size * sizeof(float*)); // Has size x size dimensions
+ float** matrix_R_prime = (float**) malloc(size * sizeof(float*)); // Has size x size dimensions
+ for(uint32_t i=0; i<size; i++) {
+ matrix_Q[i] = (float*) malloc(size * sizeof(float));
+ matrix_Q_prime[i] = (float*) malloc(size * sizeof(float));
+ matrix_R[i] = (float*) malloc(size * sizeof(float));
+ matrix_R_prime[i] = (float*) malloc(size * sizeof(float));
+ }
+
+ for (uint32_t i=0; i<size; i++) memcpy(matrix_R[i], input_matrix[i], size * sizeof(float));
+
+ eye(matrix_Q, size);
+
+ for (uint32_t i=0; i<size; i++) {
+ float* current_house_holder_vector = (float*) malloc((size - i) * sizeof(float));
+
+ float** current_house_holder_matrix = (float**) malloc((size - i) * sizeof(float*)); // Has (size-i)x(size-i) dimensions
+ for (uint32_t j=0; j<(size - i); j++) current_house_holder_matrix[j] = (float*) malloc((size - i) * sizeof(float));
+
+ float** matrix_H = (float**) malloc(size * sizeof(float*)); // Has size x size dimensions
+ for (uint32_t j=0; j<size; j++) matrix_H[j] = (float*) malloc(size * sizeof(float));
+
+ eye(matrix_H, size);
+
+ for(uint32_t j=0; j<(size - i); j++) current_house_holder_vector[j] = matrix_R[j+i][i];
+
+ house_holder_vector(current_house_holder_vector, (size - i));
+
+ house_holder_transformation(current_house_holder_vector, (size - i), current_house_holder_matrix);
+
+ // Starting from the ith ith sub matrix
+ for (uint32_t j=i; j<size; j++) {
+ for (uint32_t k=i; k<size; k++) matrix_H[j][k] = current_house_holder_matrix[j-i][k-i];
+ }
+
+ matrix_operation(matrix_H, size, size, matrix_R, size, size, matrix_R_prime, MM_MULTIPLY_GEMM);
+ matrix_operation(matrix_Q, size, size, matrix_H, size, size, matrix_Q_prime, MM_MULTIPLY_GEMM);
+
+ // print_matrix(matrix_H, size, size);
+
+ for (uint32_t j=0; j<size; j++) memcpy(matrix_Q[j], matrix_Q_prime[j], size * sizeof(float));
+ for (uint32_t j=0; j<size; j++) memcpy(matrix_R[j], matrix_R_prime[j], size * sizeof(float));
+
+ for (uint32_t j=0; j<size; j++) free(matrix_H[j]);
+ free(matrix_H);
+ for (uint32_t j=0; j<(size - i); j++) free(current_house_holder_matrix[j]);
+ free(current_house_holder_matrix);
+ free(current_house_holder_vector);
+ }
+
+ matrix_operation(matrix_R, size, size, matrix_Q, size, size, resultant_matrix, MM_MULTIPLY_GEMM);
+ matrix_operation(matrix_Q, size, size, matrix_R, size, size, resultant_matrix, MM_MULTIPLY_GEMM);
+
+ print_matrix(matrix_R, size, size);
+
+ for (uint32_t i=0; i<size; i++) {
+ free(matrix_R[i]);
+ free(matrix_R_prime[i]);
+ free(matrix_Q_prime[i]);
+ free(matrix_Q[i]);
+ }
+ free(matrix_R_prime);
+ free(matrix_R);
+ free(matrix_Q_prime);
+ free(matrix_Q);
+}
+
+void qr_algorithm(float** input_matrix, uint32_t rows, uint32_t cols, float** resultant_matrix) {
+ float** covariance_matrix = (float**) malloc(cols * sizeof(float*)); // Has feature x feature dimensions
+ for (uint32_t i=0; i<cols; i++) covariance_matrix[i] = (float*) malloc(cols * sizeof(float));
+
+ float** covariance_matrix_prime = (float**) malloc(cols * sizeof(float*)); // Has feature x feature dimensions
+ for (uint32_t i=0; i<cols; i++) covariance_matrix_prime[i] = (float*) malloc(cols * sizeof(float));
+
+ centre_scale(input_matrix, rows, cols, MEAN);
+ covariance(input_matrix, rows, cols, covariance_matrix);
+
+ qr_decomposition(covariance_matrix, cols, covariance_matrix_prime);
+ while (matrix_norm(covariance_matrix_prime, covariance_matrix, cols, cols) > TOLERANCE) {
+ for (uint32_t i=0; i<cols; i++) {
+ for (uint32_t j=0; j<cols; j++) {
+ if (i == j) printf("%.2f ", covariance_matrix[i][j]);
+ }
+ }
+
+ memcpy(covariance_matrix, covariance_matrix_prime, cols);
+ qr_decomposition(covariance_matrix, cols, covariance_matrix_prime);
+
+ for (uint32_t i=0; i<cols; i++) free(covariance_matrix[i]);
+ free(covariance_matrix);
+ }
+}
+
+void power_iteration(float** input_matrix, uint32_t size, float* eigen_vector) {
+ // Initialize first guess vector
+ float* new_vector = (float*) malloc(size * sizeof(float));
+ float* curr_vector = (float*) malloc(size * sizeof(float));
+ for (uint32_t i=0; i<size; i++) curr_vector[i] = (float) rand() / RAND_MAX;
+
+ // MAX_INTERATIONS in case the singular vector doesn't converge
+ for (uint32_t i=0; i<MAX_ITERATIONS; i++) {
+ matrix_vector_multiply(input_matrix, size, size, curr_vector, new_vector, MV_MULTIPLY);
+ vector_normalize(curr_vector, size);
+ vector_normalize(new_vector, size);
+ if (fabs(fabs(cosine_similarity(new_vector, curr_vector, size))-1.0) < TOLERANCE) break;
+ memcpy(curr_vector, new_vector, size * sizeof(float));
+ }
+
+ memcpy(eigen_vector, new_vector, size * sizeof(float));
+ free(curr_vector);
+ free(new_vector);
+}
+
+float eigen_value_compute(float* eigen_vector, float** input_matrix, uint32_t size) {
+ if (vector_norm(eigen_vector, size) == 0) return 0;
+
+ float lambda = 0.0f;
+ for (uint32_t i=0; i<size; i++) {
+ lambda += input_matrix[i][i] * eigen_vector[i] * eigen_vector[i];
+ for (uint32_t j=0; j<size; j++) {
+ if (i != j) {
+ lambda += input_matrix[i][j] * eigen_vector[i] * eigen_vector[j];
+ }
+ }
+ }
+ lambda /= vector_norm(eigen_vector, size) * vector_norm(eigen_vector, size);
+ return lambda;
+}
+
+void subtract_eigen(float* eigen_vector, float** input_matrix, float** resultant_matrix, float eigen_value, uint32_t size) {
+ float** outer_product_matrix = (float**) malloc(size * sizeof(float*));
+ for (uint32_t i=0; i<size; i++) outer_product_matrix[i] = (float*) malloc(size * sizeof(float));
+
+ outer_product(eigen_vector, size, outer_product_matrix);
+
+ for (uint32_t i=0; i<size; i++) {
+ for (uint32_t j=0; j<size; j++) outer_product_matrix[i][j] *= eigen_value;
+ }
+
+ matrix_operation(input_matrix, size, size, outer_product_matrix, size, size, resultant_matrix, MM_SUBTRACT);
+
+ for (uint32_t i=0 ; i<size; i++) free(outer_product_matrix[i]);
+ free(outer_product_matrix);
+}
+
+void pca_covariance_method(float** input_matrix, uint32_t rows, uint32_t cols, float** resultant_matrix, uint32_t components) {
+ float** input_matrix_original = (float**) malloc(rows * sizeof(float*)); // Has sample x feature dimensions
+ for (uint32_t i=0; i<rows; i++) input_matrix_original[i] = (float*) malloc(cols * sizeof(float));
+
+ float* eigen_vector = (float*) malloc(cols * sizeof(float)); // Has feature dimensions
+
+ float** covariance_matrix = (float**) malloc(cols * sizeof(float*)); // Has feature x feature dimensions
+ float** covariance_matrix_prime = (float**) malloc(cols * sizeof(float*));
+ float** projection_matrix = (float**) malloc(cols * sizeof(float*)); // Has features x components dimensions
+
+ for (uint32_t i=0; i<cols; i++) {
+ projection_matrix[i] = (float*) malloc(components * sizeof(float));
+ covariance_matrix[i] = (float*) malloc(cols * sizeof(float));
+ covariance_matrix_prime[i] = (float*) malloc(cols * sizeof(float));
+ }
+
+ copy_matrix(input_matrix, input_matrix_original, rows, cols);
+
+ centre_scale(input_matrix, rows, cols, MEAN);
+ covariance(input_matrix, rows, cols, covariance_matrix);
+
+ for (uint32_t i=0; i<components; i++) {
+ power_iteration(covariance_matrix, cols, eigen_vector);
+ float eigen_value = eigen_value_compute(eigen_vector, covariance_matrix, cols);
+ subtract_eigen(eigen_vector, covariance_matrix, covariance_matrix_prime, eigen_value, cols);
+ for (uint32_t j=0; j<cols; j++) projection_matrix[j][i] = eigen_vector[j];
+
+ copy_matrix(covariance_matrix_prime, covariance_matrix, cols, cols);
+ }
+
+ matrix_operation(input_matrix_original, rows, cols, projection_matrix, cols, components, resultant_matrix, MM_MULTIPLY_GEMM); // Has sample x components dimensions
+
+ free(eigen_vector);
+
+ for (uint32_t i=0; i<cols; i++) {
+ free(projection_matrix[i]);
+ free(covariance_matrix_prime[i]);
+ free(covariance_matrix[i]);
+ }
+
+ for (uint32_t i=0; i<rows; i++) {
+ free(input_matrix_original[i]);
+ }
+
+ free(covariance_matrix_prime);
+ free(covariance_matrix);
+ free(input_matrix_original);
+ free(projection_matrix);
+}