
% This is an example of using MSDA [1,2].
% The Wisconsin Breast Cancer Data Set of UCI repository is used [3].
%
%
% Related references:
%
% 1. N. Gkalelis, V. Mezaris, I. Kompatsiaris, "Mixture subclass 
% discriminant analysis", IEEE Signal Processing Letters, vol. 18, no. 5,
% pp. 319-322, May 2011
%
% 2. N. Gkalelis, V. Mezaris, I. Kompatsiaris, T. Stathaki, "Mixture
% subclass discriminant analysis link to restricted Gaussian model and
% other generalizations", IEEE Transactions on Neural Networks and Learning 
% Systems, vol. 24, no. 1, pp. 8-21, January 2013.
%
% 3. A. Frank and A. Asuncion, UCI machine learning repository, 2010.
% http://archive.ics.uci.edu/ml
%
%
% Author: Nikolaos Gkalelis - CERTH-ITI
% Email: gkalelis@iti.gr
%
% Created 01 Aug 2013.
%

clear all; close all;

% dataset
dbMatFile = 'wbcd';
trainRatio = 5; % ratio of overall dataset that is used for training
noOfRandSplits = 1; % use split provided by creators of dataset

% prepare input parameter structure
prm.WithinScatter = 3;
prm.noOfRandSplitsLearn = 3;
prm.learnRatio = 0.6;
prm.normType = 'unityNorm'; % 'zeroMean','unityNorm'
prm.distType = 'cosine'; % 'euclidean','cosine'
prm.useCV = 1; % use cross-validation (1) or stability (2) criterion for learning optimum MSA parameters
prm.Hmx = 100; % user defined maximum allowed number of subclasses
prm.Nmin_sub = 6; % minimum allowed number of observations per subclass

% trace
logFile = char( strcat('log', '_', 'notebook_msda', '_' , dbMatFile, '_', prm.normType, '_', prm.distType, '_', int2str(prm.WithinScatter), '_', num2str(prm.learnRatio*10), '.txt'));
fd_m = fopen(logFile, 'a');

% load data
load(dbMatFile);

% dataset details
numberOfClasses = length(unique(gnd));
classLbls = unique(gnd); % class labels

% normalize labels from 1 to C
origGnd = gnd; clear gnd;
gnd = zeros(size(origGnd));
for cc=1:numberOfClasses
    gnd(origGnd == classLbls(cc)) = cc;
end
classLbls = sort(unique(gnd));

% if possible test different partitions of the data to get robust estimation
cvCycle.first = 1;
cvCycle.last = noOfRandSplits;

% initialize
CCR = zeros(1, cvCycle.last - cvCycle.first +1);
H = CCR; % record total subclasses
for i=cvCycle.first:cvCycle.last
    strPartitionMatrix = char(strcat(int2str(i), '.mat') ); % partition matrix
    load(strPartitionMatrix); % load indexes for this random split: trainIdx, testIdx
    prm.fea_Train = fea(trainIdx, :); % training set at each row
    prm.gnd_Train = gnd(trainIdx); % labels
        
    res_learn = learn_msda(prm);
    
    prm.W = res_learn.W;
    prm.H = res_learn.H;
    prm.fea_Mean = res_learn.fea_Mean;
    prm.fea_Test = fea(testIdx, :); % test set
    prm.gnd_Test = gnd(testIdx);    
    res_test = test_msda(prm);
    idx = i - cvCycle.first +1;
    CCR(idx) = res_test.CCR;
    H(idx) = res_learn.H;
    fprintf(fd_m, 'notebook_msda>> CV (%d) CCR (%d) H (%d)\n', i, CCR(idx), H(idx));
end

CCR_fin = mean(CCR);

if prm.useCV == 1
    fprintf(fd_m , 'notebook_msda>> Using CV criterion CCR (%d)\n', CCR_fin);
else
    fprintf(fd_m , 'notebook_msda>> Using stability criterion CCR (%d)\n', CCR_fin);
end

fclose(fd_m);
