#https://youtu.
be/uWTzkUD3V9g
__author__ = "Sreenivas Bhattiprolu"
"""
Gabor and traditional filters for feature generation and
Random Forest, SVM for classification.
"""
import numpy as np
import cv2
import pandas as pd
img = cv2.imread('graos-de-cafe/geral.jpeg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#Here, if you have multichannel image then extract the right channel instead of
converting the image to grey.
#For example, if DAPI contains nuclei information, extract the DAPI channel image
first.
#Multiple images can be used for training. For that, you need to concatenate the
data
#Save original image pixels into a data frame. This is our Feature #1.
img2 = img.reshape(-1)
df = pd.DataFrame()
df['Original Image'] = img2
#Generate Gabor features
num = 1 #To count numbers up in order to give Gabor features a lable in the data
frame
kernels = []
for theta in range(2): #Define number of thetas
theta = theta / 4. * np.pi
for sigma in (1, 3): #Sigma with 1 and 3
for lamda in np.arange(0, np.pi, np.pi / 4): #Range of wavelengths
for gamma in (0.05, 0.5): #Gamma values of 0.05 and 0.5
gabor_label = 'Gabor' + str(num) #Label Gabor columns as Gabor1,
Gabor2, etc.
# print(gabor_label)
ksize=9
kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda,
gamma, 0, ktype=cv2.CV_32F)
kernels.append(kernel)
#Now filter the image and add values to a new column
fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
filtered_img = fimg.reshape(-1)
df[gabor_label] = filtered_img #Labels columns as Gabor1, Gabor2,
etc.
print(gabor_label, ': theta=', theta, ': sigma=', sigma, ':
lamda=', lamda, ': gamma=', gamma)
num += 1 #Increment for gabor column label
########################################
#Gerate OTHER FEATURES and add them to the data frame
#CANNY EDGE
edges = cv2.Canny(img, 100,200) #Image, min and max values
edges1 = edges.reshape(-1)
df['Canny Edge'] = edges1 #Add column to original dataframe
from skimage.filters import roberts, sobel, scharr, prewitt
#ROBERTS EDGE
edge_roberts = roberts(img)
edge_roberts1 = edge_roberts.reshape(-1)
df['Roberts'] = edge_roberts1
#SOBEL
edge_sobel = sobel(img)
edge_sobel1 = edge_sobel.reshape(-1)
df['Sobel'] = edge_sobel1
#SCHARR
edge_scharr = scharr(img)
edge_scharr1 = edge_scharr.reshape(-1)
df['Scharr'] = edge_scharr1
#PREWITT
edge_prewitt = prewitt(img)
edge_prewitt1 = edge_prewitt.reshape(-1)
df['Prewitt'] = edge_prewitt1
#GAUSSIAN with sigma=3
from scipy import ndimage as nd
gaussian_img = nd.gaussian_filter(img, sigma=3)
gaussian_img1 = gaussian_img.reshape(-1)
df['Gaussian s3'] = gaussian_img1
#GAUSSIAN with sigma=7
gaussian_img2 = nd.gaussian_filter(img, sigma=7)
gaussian_img3 = gaussian_img2.reshape(-1)
df['Gaussian s7'] = gaussian_img3
#MEDIAN with sigma=3
median_img = nd.median_filter(img, size=3)
median_img1 = median_img.reshape(-1)
df['Median s3'] = median_img1
#VARIANCE with size=3
#variance_img = nd.generic_filter(img, np.var, size=3)
#variance_img1 = variance_img.reshape(-1)
#df['Variance s3'] = variance_img1 #Add column to original dataframe
######################################
#Now, add a column in the data frame for the Labels
#For this, we need to import the labeled image
labeled_img = cv2.imread('mascaras-binarias/geral_casca_choco_pedra_preto-verde-
ardido_quebradu_background-maior.ome.tiff')
#Remember that you can load an image with partial labels
#But, drop the rows with unlabeled data
labeled_img = cv2.cvtColor(labeled_img, cv2.COLOR_BGR2GRAY)
labeled_img1 = labeled_img.reshape(-1)
df['Labels'] = labeled_img1
print(df.head())
original_img_data = df.drop(labels = ["Labels"], axis=1) #Use for prediction
#df.to_csv("Gabor.csv")
df = df[df.Labels != 0]
#########################################################
#Define the dependent variable that needs to be predicted (labels)
Y = df["Labels"].values
#Encode Y values to 0, 1, 2, 3, .... (NOt necessary but makes it easy to use other
tools like ROC plots)
from sklearn.preprocessing import LabelEncoder
Y = LabelEncoder().fit_transform(Y)
#Define the independent variables
X = df.drop(labels = ["Labels"], axis=1)
#Split data into train and test to verify accuracy after fitting the model.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2,
random_state=20)
# Import the model we are using
#RandomForestRegressor is for regression type of problems.
#For classification we use RandomForestClassifier.
#Both yield similar results except for regressor the result is float
#and for classifier it is an integer.
from sklearn.ensemble import RandomForestClassifier
# Instantiate model with n number of decision trees
model = RandomForestClassifier(n_estimators = 20, random_state = 42)
# Train the model on training data
model.fit(X_train, y_train)
# Get numerical feature importances
#importances = list(model.feature_importances_)
#Let us print them into a nice format.
feature_list = list(X.columns)
feature_imp =
pd.Series(model.feature_importances_,index=feature_list).sort_values(ascending=Fals
e)
print(feature_imp)
###
#SVM
# Train the Linear SVM to compare against Random Forest
#SVM will be slower than Random Forest.
#Make sure to comment out Fetaure importances lines of code as it does not apply to
SVM.
from sklearn.svm import LinearSVC
model_SVM = LinearSVC(max_iter=1200)
model_SVM.fit(X_train, y_train)
#Logistic regression
#from sklearn.linear_model import LogisticRegression
#model_LR = LogisticRegression(max_iter=100).fit(X_train, y_train)
# prediction_test_LR = model_logistic.predict(X_test)
# verify number of trees used. If not defined above.
#print('Number of Trees used : ', model.n_estimators)
#STEP 8: TESTING THE MODEL BY PREDICTING ON TEST DATA
#AND CALCULATE THE ACCURACY SCORE
#Test prediction on testing data.
prediction_RF = model.predict(X_test)
prediction_SVM = model_SVM.predict(X_test)
#prediction_LR = model_LR.predict(X_test)
#.predict just takes the .predict_proba output and changes everything
#to 0 below a certain threshold (usually 0.5) respectively to 1 above that
threshold.
#In this example we have 4 labels, so the probabilities will for each label stored
separately.
#
#prediction_prob_test = model.predict_proba(X_test)
#Let us check the accuracy on test data
from sklearn import metrics
#Print the prediction accuracy
#Check accuracy on test dataset. If this is too low compared to train it indicates
overfitting on training data.
print ("Accuracy using Random Forest= ", metrics.accuracy_score(y_test,
prediction_RF))
print ("Accuracy using SVM = ", metrics.accuracy_score(y_test, prediction_SVM))
#print ("Accuracy using LR = ", metrics.accuracy_score(y_test, prediction_LR))
#https://www.scikit-yb.org/en/latest/api/classifier/rocauc.html
from yellowbrick.classifier import ROCAUC
print("Classes in the image are: ", np.unique(Y))
#ROC curve for RF
roc_auc=ROCAUC(model, classes=[0, 1, 2, 3, 4, 5]) #Create object
roc_auc.fit(X_train, y_train)
roc_auc.score(X_test, y_test)
roc_auc.show()
#ROC curve for SVM
roc_auc=ROCAUC(model_SVM, classes=[0, 1, 2, 3, 4, 5]) #Create object
roc_auc.fit(X_train, y_train)
roc_auc.score(X_test, y_test)
roc_auc.show()
#ROC curve for LR
#roc_auc=ROCAUC(model_LR, classes=[0, 1, 2, 3]) #Create object
#roc_auc.fit(X_train, y_train)
#roc_auc.score(X_test, y_test)
#roc_auc.show()
############################################
#FOR RANDOM FOREST
#############################################
#MAKE PREDICTION
#You can store the model for future use. In fact, this is how you do machine
elarning
#Train on training images, validate on test images and deploy the model on unknown
images.
import pickle
#Save the trained model as pickle string to disk for future use
filename = "modelos/geral-grande-mil-model"
pickle.dump(model, open(filename, 'wb'))
#To test the model on future datasets
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.predict(original_img_data)
segmented = result.reshape((img.shape))
from matplotlib import pyplot as plt
plt.imshow(segmented, cmap ='jet')
#plt.imsave('segmented_rock_RF_100_estim.jpg', segmented, cmap ='jet')