refactored tool chain setup, able to debug and run from vscode nowHEAD master

author: Eric Dao <eric@erickhangdao.com> 2023-05-12 14:09:07 -0400
committer: Eric Dao <eric@erickhangdao.com> 2023-05-12 14:09:07 -0400
commit: 191e4bd8beae134295d481773823142d2fdc6a98 (patch)
tree: aaca4e7f8e229f9bd551d9bb1583b8ecd7a59d4a /OASIS/C/src/ekd_svm.c
parent: 186daed7e179241377c117e9d208ccd301d4d712 (diff)
download: ura-master.tar.gz
ura-master.tar.bz2
ura-master.zip
1 files changed, 0 insertions, 259 deletions
diff --git a/OASIS/C/src/ekd_svm.c b/OASIS/C/src/ekd_svm.c
deleted file mode 100644
index 13c155d..0000000
--- a/OASIS/C/src/ekd_svm.c
+++ /dev/null
@@ -1,259 +0,0 @@
-#include "ekd_svm.h"
-
-void svm_preprocess(float** input_matrix, uint32_t rows, uint32_t cols) {
-    centre_scale(input_matrix, rows, cols, MEAN_UNIT_VARIANCE);
-}
-
-float rbf_kernel(float* sample_1, float* sample_2, uint32_t features, float gamma) {
-    float distance = 0.0f;
-    for (uint32_t i=0; i<features; i++) distance += pow(sample_1[i] - sample_2[i], 2);
-    return exp(-gamma * distance);
-}
-
-void kernel_matrix_transform(float** input_matrix, float** resultant_matrix, uint32_t samples, uint32_t features, float gamma) {
-    for (uint32_t i=0; i<samples; i++) {
-        for (uint32_t j=0; j<samples; j++) {
-            float kernel = rbf_kernel(input_matrix[i], input_matrix[j], features, gamma);
-            resultant_matrix[i][j] = kernel;
-        }
-    }
-}
-
-void kernel_row_transform(float** input_matrix, float* query_row, float* resultant_row, uint32_t samples, uint32_t features, float gamma) {
-    for (uint32_t i=0; i<samples; i++) {
-        float kernel = rbf_kernel(input_matrix[i], query_row, features, gamma);
-        resultant_row[i] = kernel;
-    }
-}
-
-float get_error(uint32_t index, float* alphas, float** kernel_matrix, int32_t* labels, uint32_t samples) {
-    float error = 0.0f;
-
-    for (uint32_t i=0; i<samples; i++) error += alphas[i] * labels[i] * kernel_matrix[i][index];
-
-    error -= labels[index];
-    return error;
-}
-
-uint32_t find_second_alpha(uint32_t first_index, float first_error_val, float* alphas, float** kernel_matrix, int32_t* labels, uint32_t samples) {
-    int32_t second_index = -1;
-    float max_error_diff = 0;
-
-    for (uint32_t i=0; i<samples; i++) {
-        if (i == first_index) continue;
-
-        float second_error_val = get_error(i, alphas, kernel_matrix, labels, samples);
-        float error_diff = fabs(first_error_val - second_error_val);
-        if (error_diff > max_error_diff) {
-            max_error_diff = error_diff;
-            second_index = i;
-        }
-    }
-    return second_index;
-}
-
-void calculate_bounds(float first_label, float second_label, float first_alpha, float second_alpha, float penalty, float* lower_bound, float* upper_bound) {
-    if (first_label != second_label) {
-        *lower_bound = fmax(0, second_alpha - first_alpha);
-        *upper_bound = fmin(penalty, penalty + second_alpha - first_alpha);
-    } else {
-        *lower_bound = fmax(0, second_alpha + first_alpha - penalty);
-        *upper_bound = fmin(penalty, second_alpha + first_alpha);
-    }
-}
-
-float clip_second_alpha_new(float second_alpha_new, float lower_bound, float upper_bound) {
-    if (second_alpha_new < lower_bound) {
-        return lower_bound;
-    } else if (second_alpha_new > upper_bound) {
-        return upper_bound;
-    } else {
-        return second_alpha_new;
-    }
-}
-
-void update_alphas(float first_alpha_new, float second_alpha_new, float* alphas, uint32_t first_index, uint32_t second_index) {
-    alphas[first_index] = first_alpha_new;
-    alphas[second_index] = second_alpha_new;
-}
-
-float calculate_bias(float first_error_val, float second_error_val, float first_alpha_new, float first_alpha, float second_alpha_new, float second_alpha, float kernel_val_ii, float kernel_val_ij, float kernel_val_jj, float first_label, float second_label, float penalty) {
-    float first_bias = first_error_val + first_label * (first_alpha_new - first_alpha) * kernel_val_ii + second_label * (second_alpha_new - second_alpha) * kernel_val_ij;
-    float second_bias = first_error_val + first_label * (first_alpha_new - first_alpha) * kernel_val_ij + second_label * (second_alpha_new - second_alpha) * kernel_val_jj;
-    float bias;
-    if (first_alpha_new > 0 && first_alpha_new < penalty) {
-        bias = first_bias;
-    } else if (second_alpha_new > 0 && second_alpha_new < penalty) {
-        bias = second_bias;
-    } else {
-        bias = (first_bias + second_bias) / 2;
-    }
-    return bias;
-}
-
-void update_errors(float* errors, uint32_t samples, uint32_t first_index, uint32_t second_index, float first_label, float second_label, float first_alpha_new, float first_alpha, float second_alpha_new, float second_alpha, float* bias, float** kernel_matrix) {
-    float bias_temp = *bias;
-    for (uint32_t i=0; i<samples; i++) {
-        if (i == first_index || i == second_index) continue;
-        float error = errors[i];
-        error += first_label * (first_alpha_new - first_alpha) * kernel_matrix[first_index][i];
-        error -= bias_temp - *bias;
-        errors[i] = error;
-    }
-}
-
-uint8_t optimize_alphas(uint32_t first_index, uint32_t second_index, float* alphas, float* bias, float* errors, float** kernel_matrix, int32_t* labels, uint32_t samples, float penalty) {
-    if (first_index == second_index) return 0;
-
-    float first_label = (float) labels[first_index];
-    float second_label = (float) labels[second_index];
-
-    float first_alpha = alphas[first_index];
-    float second_alpha = alphas[second_index];
-
-    float first_error_val = errors[first_index];
-    float second_error_val = errors[second_index];
-
-    float kernel_val_ii = kernel_matrix[first_index][first_index];
-    float kernel_val_jj = kernel_matrix[second_index][second_index];
-    float kernel_val_ij = kernel_matrix[first_index][second_index];
-
-    // Second derivative of objective function
-    float learning_rate = 2 * kernel_val_ij - kernel_val_ii - kernel_val_jj;
-    if (learning_rate >= 0) return 0;
-
-    float second_alpha_new = second_alpha - (second_label * (first_error_val - second_error_val) / learning_rate);
-
-    float lower_bound, upper_bound;
-    calculate_bounds(first_label, second_label, first_alpha, second_alpha, penalty, &lower_bound, &upper_bound);
-    second_alpha_new = clip_second_alpha_new(second_alpha_new, lower_bound, upper_bound);
-
-    if (fabs(second_alpha_new - second_alpha) < SVM_TOLERANCE * (second_alpha_new + second_alpha + SVM_TOLERANCE)) return 0;
-    
-    float first_alpha_new = first_alpha + first_label * second_label * (second_alpha - second_alpha_new);
-
-    update_alphas(first_alpha_new, second_alpha_new, alphas, first_index, second_index);
-
-    float bias_new = calculate_bias(first_error_val, second_error_val, first_alpha_new, first_alpha, second_alpha_new, second_alpha, kernel_val_ii, kernel_val_ij, kernel_val_jj, first_label, second_label, penalty);
-    *bias = bias_new;
-
-    update_errors(errors, samples, first_index, second_index, first_label, second_label, first_alpha_new, first_alpha, second_alpha_new, second_alpha, bias, kernel_matrix);
-
-    return 1;
-}
-
-uint8_t verify_kkt(uint32_t first_index, float tolerance, float* alphas, float* bias, float* errors, float** kernel_matrix, int32_t* labels, uint32_t samples, float penalty) {
-    float first_label = (float) labels[first_index];
-    float first_alpha =  alphas[first_index];
-    float first_error_val = errors[first_index];
-    float first_residual_error = first_error_val * first_label;
-    
-    if ((first_residual_error < -tolerance && first_alpha < penalty) || (first_residual_error > tolerance && first_alpha > 0)) {
-        // Alpha is able be optimized, now find second alpha using heuristic method
-        uint32_t second_index = find_second_alpha(first_index, first_error_val, alphas, kernel_matrix, labels, samples);
-        // If the alphas have been optimized, return
-        if (optimize_alphas(first_index, second_index, alphas, bias, errors, kernel_matrix, labels, samples, penalty)) return 1;
-
-        // Could not find second alpha or couldn't optimize alphas values
-        uint32_t max_index = -1;
-        float max_error = -FLT_MAX;
-        for (uint32_t i=0; i<samples; i++) {
-            if (alphas[i] > 0 && alphas[i] < penalty) {
-                float error = errors[i];
-                if (fabs(error - first_error_val) > max_error) {
-                    max_index = i;
-                    max_error = fabs(error - first_error_val);
-                }
-            }
-        }
-
-        if (max_index != -1 && optimize_alphas(first_index, max_index, alphas, bias, errors, kernel_matrix, labels, samples, penalty) == 1) return 1;
-        
-        // Still couldn't optimize alpha values:w
-        return 0;
-    }
-
-    // Alpha values do not need to be optimized
-    return 0;
-}
-
-void train_svm(float** data_matrix, uint32_t samples, uint32_t features, float* alphas, float* bias, int32_t* labels, float penalty) {
-    // Centre and scale reduced_dimension_matrix
-    svm_preprocess(data_matrix, samples, features);
-
-    float** kernel_matrix = (float**) malloc(samples * sizeof(float*));
-    for (uint32_t i=0; i<samples; i++) kernel_matrix[i] = (float*) malloc(samples * sizeof(float));
-
-    // Compute kernel matrix using RBF
-    kernel_matrix_transform(data_matrix, kernel_matrix, samples, features, GAMMA);
-
-    // Initialize bias to 0, alphas/errors to random numbers between -1 and 1
-    *bias = 0;
-
-    float* errors = (float*) malloc(samples * sizeof(float));
-    for (uint32_t i=0; i<samples; i++) {
-        errors[i] = (float) rand() / (float) RAND_MAX * 2 - 1;
-        alphas[i] = (float) rand() / (float) RAND_MAX * 2 - 1;
-    }
-
-    uint32_t num_changed_alphas = 0;
-    uint32_t search_passes = 0;
-    uint8_t searched_all_flag = 1;
-
-    while ((search_passes < MAX_SVM_PASSES && num_changed_alphas > 0) || searched_all_flag) {
-        num_changed_alphas = 0;
-
-        if (searched_all_flag) {
-            // Loops over all samples
-            for (uint32_t sample_index=0; sample_index<samples; sample_index++) 
-                num_changed_alphas += verify_kkt(sample_index, SVM_TOLERANCE, alphas, bias, errors, kernel_matrix, labels, samples, penalty);
-        } else {
-            // Loops over non support vector samples (not 0 and not C -> penalty parameter)
-            for (uint32_t sample_index=0; sample_index<samples; sample_index++) {
-                if (alphas[sample_index] > 0 && alphas[sample_index] < penalty)
-                    num_changed_alphas += verify_kkt(sample_index, SVM_TOLERANCE, alphas, bias, errors, kernel_matrix, labels, samples, penalty);
-            }
-        }
-
-        if (searched_all_flag) {
-            searched_all_flag = 0;
-        } else if (num_changed_alphas == 0) {
-            searched_all_flag = 1;
-        }
-        search_passes++;
-    }
-
-    for (uint32_t i=0; i<samples; i++) free (kernel_matrix[i]);
-    free(kernel_matrix);
-    free(errors);
-}
-
-int32_t svm_predict(float** test_data, int32_t* labels_test, uint32_t num_test, uint32_t num_components, float* alphas, float bias, float gamma) {
-    int num_correct = 0;
-    for (int i=0; i<num_test; i++) {
-        float transformed_row[num_test];
-        kernel_row_transform(test_data, test_data[i], transformed_row, num_test, num_components, gamma);
-        float prediction = bias;
-        for (int j = 0; j<num_test; j++) {
-            prediction += alphas[j] * transformed_row[j];
-        }
-        if (prediction >= 0 && labels_test[i] == 1) {
-            num_correct++;
-        } else if (prediction < 0 && labels_test[i] == -1) {
-            num_correct++;
-        }
-    }
-    return num_correct;
-}
-
-float test_svm(float** data_test, int32_t* labels_test, uint32_t num_test, uint32_t num_components, float* alphas, float bias) {
-    uint32_t num_correct = 0;
-    float* prediction = (float*) malloc(num_test * sizeof(float));
-    for (uint8_t i=0; i<num_test; i++) {
-        prediction[i] = svm_predict(data_test, labels_test, num_test, num_components, alphas, bias, GAMMA);
-        if (prediction[i] * labels_test[i] > 0) num_correct++;
-    }
-    float accuracy = ((float) num_correct / num_test) * 100;
-    free(prediction);
-    return accuracy;
-}
-\ No newline at end of file
author	Eric Dao <eric@erickhangdao.com>	2023-05-12 14:09:07 -0400
committer	Eric Dao <eric@erickhangdao.com>	2023-05-12 14:09:07 -0400
commit	191e4bd8beae134295d481773823142d2fdc6a98 (patch)
tree	aaca4e7f8e229f9bd551d9bb1583b8ecd7a59d4a /OASIS/C/src/ekd_svm.c
parent	186daed7e179241377c117e9d208ccd301d4d712 (diff)
download	ura-master.tar.gz ura-master.tar.bz2 ura-master.zip