import os import cv2 import numpy as np from sklearn.decomposition import PCA from sklearn.model_selection import train_test_split from sklearn import svm from sklearn.preprocessing import StandardScaler import matplotlib.pyplot as plt FEATURE_VECTOR_SIZE = 25344 # 144x176 dneg_folder_path = "/home/eric/URA/OASIS/Dataset/BMP/DNEG/" dpos_folder_path = "/home/eric/URA/OASIS/Dataset/BMP/DPOS/" dneg_files = [] dpos_files = [] for file in os.listdir(dneg_folder_path): dneg_file_path = os.path.join(dneg_folder_path, file) if os.path.isfile(dneg_file_path): dneg_img = cv2.imread(dneg_file_path, 0) dneg_files.append(dneg_img) for file in os.listdir(dpos_folder_path): dpos_file_path = os.path.join(dpos_folder_path, file) if os.path.isfile(dpos_file_path): dpos_img = cv2.imread(dpos_file_path, 0) dpos_files.append(dpos_img) # Create labels dneg_labeled_files = [0] * len(dneg_files) dpos_labeled_files = [1] * len(dpos_files) # Concatenate arrays data = dneg_files + dpos_files labels = dneg_labeled_files + dpos_labeled_files # Convert arrays to numpy arrays data = np.array(data) labels = np.array(labels) # Flatten and scale data data_flatten = data.reshape(len(data), FEATURE_VECTOR_SIZE) scaler = StandardScaler() data_flatten = scaler.fit_transform(data_flatten) # Reduce dimensions pca = PCA(n_components=115) pca.fit(data_flatten) data_flatten = pca.transform(data_flatten) # print(data_flatten) # PC_values = np.arange(pca.n_components_) + 1 # plt.plot(PC_values, pca.explained_variance_ratio_, 'o-', linewidth=2, color='blue') # plt.title('Scree Plot') # plt.xlabel('Principal Component') # plt.ylabel('Variance Explained') # plt.show() for x in range(50): # Split data into training and testing sets data_training, data_testing, labels_training, labels_testing = train_test_split(data_flatten, labels, test_size = 0.2) # Train and evaluate SVM clf = svm.SVC() clf.fit(data_training, labels_training) print("C value: ", clf.get_params()['C']) print("Gamma value: ", clf.get_params()['gamma']) print(clf.score(data_testing, labels_testing)) # Plot data # plt.scatter(data_flatten[:, 0], data_flatten[:, 1], c=labels, cmap='viridis') # plt.show() # accuracies = [] # for num_components in range(2, 234): # # Flatten and scale data # data_flatten = data.reshape(len(data), FEATURE_VECTOR_SIZE) # scaler = StandardScaler() # data_flatten = scaler.fit_transform(data_flatten) # # Reduce dimensions # pca = PCA(n_components=num_components) # pca.fit(data_flatten) # data_flatten_pca = pca.transform(data_flatten) # # Calculate accuracy 10 times # accuracy = 0 # for i in range(100): # # Split data into training and testing sets # data_training, data_testing, labels_training, labels_testing = train_test_split(data_flatten_pca, labels, test_size = 0.2) # # Train and evaluate SVM # clf = svm.SVC() # clf.fit(data_training, labels_training) # accuracy += clf.score(data_testing, labels_testing) # accuracy /= 100 # accuracies.append(accuracy) # # Plot results # plt.plot(range(2, 234), accuracies) # plt.title('Number of PCA Components vs. SVM Accuracy') # plt.xlabel('Number of PCA Components') # plt.ylabel('Accuracy') # plt.show()