% Developed by ASA Salgadoe (asalgado@myune.edu.au, surantha_a@yahoo.com)
% on 29/10/2019
% new test of choosing wl that can classify the healthy and control
%  PLSR (diamention REduction) + LDA or QDA for classification
% ############################################


load('allDAI.mat');
% col1: week, col2:trt , col3 onwards:spectral
% for Pc infected
wls=allDAI(1,3:end);
dataSet=allDAI(2:end,:);
cat=dataSet(:,1:2);

% remove first few spectra (for noise removal)
% subset the wl 400 onwards
loc400=find(wls==400);
wls=wls(:,loc400:end);

% All sampled data col1-treeId, col2-group, col3 onwards-bands
% RAW spectra
spec=dataSet(:,3:end);
spec=spec(:,loc400:end);
dataClean=[cat,spec];
dataClean=sortrows(dataClean,1);

% % % 1st method analyse as helathy vs dis groups
% % % 2nd method analyse as healthy vs dis per each week 

% % for 1st method
% grp_trt=dataSet(:,2:end);
% grp_trt=sortrows(grp_trt,1);

% for 2nd method
% DAI codes 1,2,3,4,5,6,7 
grp_trt=dataClean(find(dataSet(:,1)==7),2:end);
grp_trt=sortrows(grp_trt,1);

% % % % % for water stressed#############
% % % % load('glassH_WS_26_9_2017_s3.mat')
% % % % dataWS=glassH_WS_26_9_2017_s3;
% % % % wls=dataWS(1,3:end);
% % % % dataSetW=dataWS(2:end,:);
% % % % catW=dataSetW(:,2);
% % % % 
% % % % % remove first few spectra (for noise removal)
% % % % % subset the wl 400 onwards
% % % % loc400=find(wls==400);
% % % % wls=wls(:,loc400:end);
% % % % 
% % % % % All sampled data col1-treeId, col2-group, col3 onwards-bands
% % % % % RAW spectra
% % % % specW=dataSetW(:,3:end);
% % % % specW=specW(:,loc400:end);
% % % % dataCleanW=[catW,specW];
% % % % dataCleanW=sortrows(dataCleanW,1);

% % % % grp_trt=dataCleanW;
% % % % grp_trt=sortrows(grp_trt,1);

% #####################################



% asignment of X and Y
x=grp_trt(:,2:end); %spectra
y=grp_trt(:,1);%categories 




% **********************************************
% Normalization Each spectra::::::::::::::::(if required)
% Euclidian norm function
% Reff>>>folder7.3/paper28
% % spec_N=spec; %normalized
% % for x=1:length(spec_N(:,1))
% % spec_N(x,:)=normSpec(spec_N(x,:));
% % end
% % 
% % spectra1=[detailsCol1,spec_N];

% **********************************************

%################# performing PLSR  #####################

componentRange=10;
[Xloadings,Yloadings,Xscores,Yscores,betaPLS10,PLSPctVar,PLSmsep,stats] = plsregress(x,y,componentRange,'CV',10);

% ---------(optional)this should do to view the loadings/scores of the best component
% plot the variances 
% PLSPctVar 1st row for x, 2nd row for y variables
% the amount of variance explained in each x or Y
figure,plot(1:10,cumsum(100*PLSPctVar(1,:)),'-bo');
xlabel('Number of PLS components');
ylabel('Percent Variance Explained in X')


% Selecting best correct num of components
% Cross validation results-lowest MSPE
% 1st row for x, 2nd row for y variables
figure,plot(0:10,PLSmsep(1,:));
xlabel('Number of components');
ylabel('Estimated Mean Squared Prediction Error');
% 
%####(do after classition) decided on a component selected######
cSelected=3;
[XloadingsC,YloadingsC,XscoresC,YscoresC,betaPLS10C,PLSPctVarC,PLSmsepC,statsC] = plsregress(x,y,cSelected,'CV',10);

% co-effficients for the selected num of components
figure,plot(wls,betaPLS10C(2:end),'b-o');
xlabel('WaveLength (nm)');
ylabel('Coefficients');

%########## Partitioning data into training and test sets
% after PLSR my categories are same but the x variables are components
% so now the data set is combination of categories and Xsocres of each comp
categories=y;
components=Xscores;
% components=x;

dataGroup = [categories,components]; 
% Cross varidation (train: 70%, test: 30%)
% it is stratified by default maintaining class sizes
cv = cvpartition(size(dataGroup,1),'HoldOut',0.3);
idx = cv.test;
% Separate to training and test data
dataTrain = dataGroup(~idx,:);
dataTest  = dataGroup (idx,:);

size(dataTrain)
size(dataTest)

% % storing the partitioned data for future use**************************
% DAI7_train=dataTrain;
% DAI7_test=dataTest;
% *******************************************


% ##############Section 2 -after data partitioning#################
% this part can be run with the stored test and train datasets

% re-laoding x and y 
% have to load the relevent training and test dataSets
load('DAI7_train.mat')
load('DAI7_test.mat')

dataTrain=DAI7_train;
dataTest=DAI7_test; 
componentRange=10;

% the optimum components 

yTrain=dataTrain(:,1);
xTrain=dataTrain(:,2:end);

yTest=dataTest(:,1);
xTest=dataTest(:,2:end);
% obtaining a set of Yscores of test set for future score plots
% YscoresTest=Yscores(idx,:);
% ################ Training  LDA or QDA models ####################
% CLASSIFICATION model
results=0;

% for i=1:componentRange
for i=3
compN=i; %the number of components I am inputing from 1:compN

% for training 
train_Sbands=xTrain(:,1:compN); %the score of the selected component/s 
trainCat=yTrain;

% For testing/new predictions
test_Spec=xTest(:,1:compN); 
%components, make sure you select the correct num of components as trained
test_labels=yTest; %categories

% Training models##################### 
% For linear------------------
MdlLinear = fitcdiscr(train_Sbands,trainCat,'DiscrimType','linear');
% For quadratic------------
MdlQuadratic = fitcdiscr(train_Sbands,trainCat,'DiscrimType','quadratic');

% now get the accuracy of training (LDA) 
yfit_trainL =MdlLinear.predict(train_Sbands); 
cm_stats_trainL=confusionmatStats(trainCat,yfit_trainL);
cm_stats_trainL.accuracy;
cm_stats_trainL.confusionMat;

% For quadratic-------(QDA)
yfit_train =MdlQuadratic.predict(train_Sbands); 
cm_stats_train=confusionmatStats(trainCat,yfit_train);
cm_stats_train.accuracy;
cm_stats_trainL.confusionMat;

% Testing models##################### 
% prediction-(LDA)
yfit_predictLDA =MdlLinear.predict(test_Spec); 
cm_stats_testLDA=confusionmatStats(test_labels,yfit_predictLDA);
% cm_stats_testLDA.accuracy;

% prediction-(QDA)
yfit_predictQDA =MdlQuadratic.predict(test_Spec); 
cm_stats_testQDA=confusionmatStats(test_labels,yfit_predictQDA);
% cm_stats_testQDA.accuracy;

% Reporting results of classification 
results(i,1)=compN;
results(i,2)=cm_stats_testLDA.accuracy(1,:); %Overall accuracyTest_LDA
results(i,3)=cm_stats_testLDA.sensitivity(2,:);% Dis(grp 2) sensitivity 
results(i,4)=cm_stats_testLDA.specificity(2,:);% Dis(grp 2) specitivit

% results(i,4)=cm_stats_testQDA.accuracy(1,:); %OverallaccuracyTest_QDA
% results(i,5)=cm_stats_testQDA.sensitivity(2,:); %Dis(grp 2) sensitivity
end

% Plotting classification accuracy ###############
% results is the test acuracies of each method
% results col1:component, col2:LDA, col3:QDA
figure, plot(results(:,1),results(:,2),'--r')
hold on
plot(results(:,1),results(:,3),'--b')
hold on
plot(results(:,1),results(:,4),'--g')
legend('LDA-Overallaccuracy','LDA-sensitivity','Specitivity')

% % view results;
% results(:,2) %overall accuracy
% results(:,3) %sensitivity 
% results(:,4) %Specitivity 



% Score plots#####################
% for the X scores
figure();
h1=gscatter(xTest(:,1),xTest(:,3),yTest,'krb','ov^',[],'off');
legend('Healthy','Infected')

% for the Y scores
figure();
h2=gscatter(YscoresTest(:,2),YscoresTest(:,3),yTest,'krb','ov^',[],'off');


%####(do after classition) decided on a component selected######
cSelected=2;
[XloadingsC,YloadingsC,XscoresC,YscoresC,betaPLS10C,PLSPctVarC,PLSmsepC,statsC] = plsregress(xTest,yTest,cSelected,'CV',10);

% wgts
figure,plot(wls,statsC.W(:,4),'-');
xlabel('Variable');
ylabel('PLS Weight');
 
% co-effficients
figure,plot(wls,betaPLS10C(2:end),'b-o');
xlabel('WaveLength (nm)');
ylabel('Coefficients');
% ******************************************