1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
import os
import cv2
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
FEATURE_VECTOR_SIZE = 25344 # 144x176
dneg_folder_path = "/home/eric/URA/OASIS/Dataset/BMP/DNEG/"
dpos_folder_path = "/home/eric/URA/OASIS/Dataset/BMP/DPOS/"
dneg_files = []
dpos_files = []
for file in os.listdir(dneg_folder_path):
dneg_file_path = os.path.join(dneg_folder_path, file)
if os.path.isfile(dneg_file_path):
dneg_img = cv2.imread(dneg_file_path, 0)
dneg_files.append(dneg_img)
for file in os.listdir(dpos_folder_path):
dpos_file_path = os.path.join(dpos_folder_path, file)
if os.path.isfile(dpos_file_path):
dpos_img = cv2.imread(dpos_file_path, 0)
dpos_files.append(dpos_img)
# Create labels
dneg_labeled_files = [0] * len(dneg_files)
dpos_labeled_files = [1] * len(dpos_files)
# Concatenate arrays
data = dneg_files + dpos_files
labels = dneg_labeled_files + dpos_labeled_files
# Convert arrays to numpy arrays
data = np.array(data)
labels = np.array(labels)
# Flatten and scale data
data_flatten = data.reshape(len(data), FEATURE_VECTOR_SIZE)
scaler = StandardScaler()
data_flatten = scaler.fit_transform(data_flatten)
# Reduce dimensions
pca = PCA(n_components=115)
pca.fit(data_flatten)
data_flatten = pca.transform(data_flatten)
# print(data_flatten)
# PC_values = np.arange(pca.n_components_) + 1
# plt.plot(PC_values, pca.explained_variance_ratio_, 'o-', linewidth=2, color='blue')
# plt.title('Scree Plot')
# plt.xlabel('Principal Component')
# plt.ylabel('Variance Explained')
# plt.show()
for x in range(50):
# Split data into training and testing sets
data_training, data_testing, labels_training, labels_testing = train_test_split(data_flatten, labels, test_size = 0.2)
# Train and evaluate SVM
clf = svm.SVC()
clf.fit(data_training, labels_training)
print("C value: ", clf.get_params()['C'])
print("Gamma value: ", clf.get_params()['gamma'])
print(clf.score(data_testing, labels_testing))
# Plot data
# plt.scatter(data_flatten[:, 0], data_flatten[:, 1], c=labels, cmap='viridis')
# plt.show()
# accuracies = []
# for num_components in range(2, 234):
# # Flatten and scale data
# data_flatten = data.reshape(len(data), FEATURE_VECTOR_SIZE)
# scaler = StandardScaler()
# data_flatten = scaler.fit_transform(data_flatten)
# # Reduce dimensions
# pca = PCA(n_components=num_components)
# pca.fit(data_flatten)
# data_flatten_pca = pca.transform(data_flatten)
# # Calculate accuracy 10 times
# accuracy = 0
# for i in range(100):
# # Split data into training and testing sets
# data_training, data_testing, labels_training, labels_testing = train_test_split(data_flatten_pca, labels, test_size = 0.2)
# # Train and evaluate SVM
# clf = svm.SVC()
# clf.fit(data_training, labels_training)
# accuracy += clf.score(data_testing, labels_testing)
# accuracy /= 100
# accuracies.append(accuracy)
# # Plot results
# plt.plot(range(2, 234), accuracies)
# plt.title('Number of PCA Components vs. SVM Accuracy')
# plt.xlabel('Number of PCA Components')
# plt.ylabel('Accuracy')
# plt.show()
|