% Developed by ASA Salgadoe (asalgado@myune.edu.au, surantha_a@yahoo.com) % on 29/10/2019 % new test of choosing wl that can classify the healthy and control % PLSR (diamention REduction) + LDA or QDA for classification % ############################################ load('allDAI.mat'); % col1: week, col2:trt , col3 onwards:spectral % for Pc infected wls=allDAI(1,3:end); dataSet=allDAI(2:end,:); cat=dataSet(:,1:2); % remove first few spectra (for noise removal) % subset the wl 400 onwards loc400=find(wls==400); wls=wls(:,loc400:end); % All sampled data col1-treeId, col2-group, col3 onwards-bands % RAW spectra spec=dataSet(:,3:end); spec=spec(:,loc400:end); dataClean=[cat,spec]; dataClean=sortrows(dataClean,1); % % % 1st method analyse as helathy vs dis groups % % % 2nd method analyse as healthy vs dis per each week % % for 1st method % grp_trt=dataSet(:,2:end); % grp_trt=sortrows(grp_trt,1); % for 2nd method % DAI codes 1,2,3,4,5,6,7 grp_trt=dataClean(find(dataSet(:,1)==7),2:end); grp_trt=sortrows(grp_trt,1); % % % % % for water stressed############# % % % % load('glassH_WS_26_9_2017_s3.mat') % % % % dataWS=glassH_WS_26_9_2017_s3; % % % % wls=dataWS(1,3:end); % % % % dataSetW=dataWS(2:end,:); % % % % catW=dataSetW(:,2); % % % % % % % % % remove first few spectra (for noise removal) % % % % % subset the wl 400 onwards % % % % loc400=find(wls==400); % % % % wls=wls(:,loc400:end); % % % % % % % % % All sampled data col1-treeId, col2-group, col3 onwards-bands % % % % % RAW spectra % % % % specW=dataSetW(:,3:end); % % % % specW=specW(:,loc400:end); % % % % dataCleanW=[catW,specW]; % % % % dataCleanW=sortrows(dataCleanW,1); % % % % grp_trt=dataCleanW; % % % % grp_trt=sortrows(grp_trt,1); % ##################################### % asignment of X and Y x=grp_trt(:,2:end); %spectra y=grp_trt(:,1);%categories % ********************************************** % Normalization Each spectra::::::::::::::::(if required) % Euclidian norm function % Reff>>>folder7.3/paper28 % % spec_N=spec; %normalized % % for x=1:length(spec_N(:,1)) % % spec_N(x,:)=normSpec(spec_N(x,:)); % % end % % % % spectra1=[detailsCol1,spec_N]; % ********************************************** %################# performing PLSR ##################### componentRange=10; [Xloadings,Yloadings,Xscores,Yscores,betaPLS10,PLSPctVar,PLSmsep,stats] = plsregress(x,y,componentRange,'CV',10); % ---------(optional)this should do to view the loadings/scores of the best component % plot the variances % PLSPctVar 1st row for x, 2nd row for y variables % the amount of variance explained in each x or Y figure,plot(1:10,cumsum(100*PLSPctVar(1,:)),'-bo'); xlabel('Number of PLS components'); ylabel('Percent Variance Explained in X') % Selecting best correct num of components % Cross validation results-lowest MSPE % 1st row for x, 2nd row for y variables figure,plot(0:10,PLSmsep(1,:)); xlabel('Number of components'); ylabel('Estimated Mean Squared Prediction Error'); % %####(do after classition) decided on a component selected###### cSelected=3; [XloadingsC,YloadingsC,XscoresC,YscoresC,betaPLS10C,PLSPctVarC,PLSmsepC,statsC] = plsregress(x,y,cSelected,'CV',10); % co-effficients for the selected num of components figure,plot(wls,betaPLS10C(2:end),'b-o'); xlabel('WaveLength (nm)'); ylabel('Coefficients'); %########## Partitioning data into training and test sets % after PLSR my categories are same but the x variables are components % so now the data set is combination of categories and Xsocres of each comp categories=y; components=Xscores; % components=x; dataGroup = [categories,components]; % Cross varidation (train: 70%, test: 30%) % it is stratified by default maintaining class sizes cv = cvpartition(size(dataGroup,1),'HoldOut',0.3); idx = cv.test; % Separate to training and test data dataTrain = dataGroup(~idx,:); dataTest = dataGroup (idx,:); size(dataTrain) size(dataTest) % % storing the partitioned data for future use************************** % DAI7_train=dataTrain; % DAI7_test=dataTest; % ******************************************* % ##############Section 2 -after data partitioning################# % this part can be run with the stored test and train datasets % re-laoding x and y % have to load the relevent training and test dataSets load('DAI7_train.mat') load('DAI7_test.mat') dataTrain=DAI7_train; dataTest=DAI7_test; componentRange=10; % the optimum components yTrain=dataTrain(:,1); xTrain=dataTrain(:,2:end); yTest=dataTest(:,1); xTest=dataTest(:,2:end); % obtaining a set of Yscores of test set for future score plots % YscoresTest=Yscores(idx,:); % ################ Training LDA or QDA models #################### % CLASSIFICATION model results=0; % for i=1:componentRange for i=3 compN=i; %the number of components I am inputing from 1:compN % for training train_Sbands=xTrain(:,1:compN); %the score of the selected component/s trainCat=yTrain; % For testing/new predictions test_Spec=xTest(:,1:compN); %components, make sure you select the correct num of components as trained test_labels=yTest; %categories % Training models##################### % For linear------------------ MdlLinear = fitcdiscr(train_Sbands,trainCat,'DiscrimType','linear'); % For quadratic------------ MdlQuadratic = fitcdiscr(train_Sbands,trainCat,'DiscrimType','quadratic'); % now get the accuracy of training (LDA) yfit_trainL =MdlLinear.predict(train_Sbands); cm_stats_trainL=confusionmatStats(trainCat,yfit_trainL); cm_stats_trainL.accuracy; cm_stats_trainL.confusionMat; % For quadratic-------(QDA) yfit_train =MdlQuadratic.predict(train_Sbands); cm_stats_train=confusionmatStats(trainCat,yfit_train); cm_stats_train.accuracy; cm_stats_trainL.confusionMat; % Testing models##################### % prediction-(LDA) yfit_predictLDA =MdlLinear.predict(test_Spec); cm_stats_testLDA=confusionmatStats(test_labels,yfit_predictLDA); % cm_stats_testLDA.accuracy; % prediction-(QDA) yfit_predictQDA =MdlQuadratic.predict(test_Spec); cm_stats_testQDA=confusionmatStats(test_labels,yfit_predictQDA); % cm_stats_testQDA.accuracy; % Reporting results of classification results(i,1)=compN; results(i,2)=cm_stats_testLDA.accuracy(1,:); %Overall accuracyTest_LDA results(i,3)=cm_stats_testLDA.sensitivity(2,:);% Dis(grp 2) sensitivity results(i,4)=cm_stats_testLDA.specificity(2,:);% Dis(grp 2) specitivit % results(i,4)=cm_stats_testQDA.accuracy(1,:); %OverallaccuracyTest_QDA % results(i,5)=cm_stats_testQDA.sensitivity(2,:); %Dis(grp 2) sensitivity end % Plotting classification accuracy ############### % results is the test acuracies of each method % results col1:component, col2:LDA, col3:QDA figure, plot(results(:,1),results(:,2),'--r') hold on plot(results(:,1),results(:,3),'--b') hold on plot(results(:,1),results(:,4),'--g') legend('LDA-Overallaccuracy','LDA-sensitivity','Specitivity') % % view results; % results(:,2) %overall accuracy % results(:,3) %sensitivity % results(:,4) %Specitivity % Score plots##################### % for the X scores figure(); h1=gscatter(xTest(:,1),xTest(:,3),yTest,'krb','ov^',[],'off'); legend('Healthy','Infected') % for the Y scores figure(); h2=gscatter(YscoresTest(:,2),YscoresTest(:,3),yTest,'krb','ov^',[],'off'); %####(do after classition) decided on a component selected###### cSelected=2; [XloadingsC,YloadingsC,XscoresC,YscoresC,betaPLS10C,PLSPctVarC,PLSmsepC,statsC] = plsregress(xTest,yTest,cSelected,'CV',10); % wgts figure,plot(wls,statsC.W(:,4),'-'); xlabel('Variable'); ylabel('PLS Weight'); % co-effficients figure,plot(wls,betaPLS10C(2:end),'b-o'); xlabel('WaveLength (nm)'); ylabel('Coefficients'); % ******************************************