
% This is an example of using an efficient variant of KMSDA [1]. 
% The first MONK Set of UCI repository is used [2].
%
%
% Related references:
%
% 1. N. Gkalelis, V. Mezaris, I. Kompatsiaris, "Mixture subclass 
% discriminant analysis", IEEE Signal Processing Letters, vol. 18, no. 5,
% pp. 319-322, May 2011
%
% 2. A. Frank and A. Asuncion, UCI machine learning repository, 2010.
% http://archive.ics.uci.edu/ml
%
%
% Author: Nikolaos Gkalelis - CERTH-ITI
% Email: gkalelis@iti.gr
%
% Created 01 Dec 2013.
%
%

clear all; close all;

% dataset
dbMatFile = 'monk1';
noOfTrials = 1; % use split provided by creators of dataset

% prepare input parameter structure
prm.noOfRandSplitsLearn = 2;
prm.learnRatio = 0.8;
prm.normType = 'zeroMean'; % 'zeroMean','unityNorm','NoNorm'
prm.distType = 'euclidean'; % 'euclidean','cosine'
prm.Hmx = 3; % user defined maximum allowed number of subclasses
prm.Nmin_sub = 6; % minimum allowed number of observations per subclass

% Kernel parameters
prm.kernel_opts.kernel_values = 0.1:0.2:1.3; % range of values for identifying optimal kernel parameter
prm.kernel_opts.KernelType = 'Gaussian'; % kernel type: linear Polynomial PolyPlus Gaussian
prm.kernel_opts.Kernel = 1; % 1: data have been not kernelized
prm.kernel_opts.Regu = 0.001; % kernel regularization parameter

% trace
logFile = char( strcat('log', '_', 'notebook_kmsda', '_' , dbMatFile, '_', prm.normType, '_', prm.distType, '_', num2str(prm.learnRatio*10), '.txt'));
fd_m = fopen(logFile, 'a');

% load data
load(dbMatFile);

% dataset details
numberOfClasses = length(unique(gnd));
classLbls = unique(gnd); % class labels

% normalize labels from 1 to C
origGnd = gnd; clear gnd;
gnd = zeros(size(origGnd));
for cc=1:numberOfClasses
    gnd(origGnd == classLbls(cc)) = cc;
end
classLbls = sort(unique(gnd));

% if applicable test different partitions of the data to get robust
% performance estimation
trial.first = 1;
trial.last = noOfTrials;

% initialize
CCR = zeros(1, trial.last - trial.first +1);
H = CCR; % record total subclasses
KerVal = CCR; % record optimum kernel parameter
for i=trial.first:trial.last
    strPartitionMatrix = char(strcat(int2str(i), '.mat') ); % partition matrix
    load(strPartitionMatrix); % load indexes for this random split: trainIdx, testIdx
    prm.fea_Train = fea(trainIdx, :); % training set at each row
    prm.gnd_Train = gnd(trainIdx); % labels
        
    res_learn = learn_kmsda(prm);
    
    prm.W = res_learn.W;
    prm.H = res_learn.H;
    prm.kernel_opts = res_learn.kernel_opts;
    prm.fea_Mean = res_learn.fea_Mean;
    prm.fea_Train = res_learn.fea_Train; % keep the sorted version for the kernel evaluations
    prm.gnd_Train = res_learn.gnd_Train;
    prm.fea_Train_Proj = res_learn.fea_Train_Proj; % projected train set
    prm.fea_Test = fea(testIdx, :); % test set
    prm.gnd_Test = gnd(testIdx);
    
    res_test = test_kmsda(prm);
    idx = i - trial.first +1;
    CCR(idx) = res_test.CCR;
    H(idx) = res_learn.H;
    KerVal(idx) = res_learn.kernel_opts.t;
    fprintf(fd_m, 'notebook_kmsda>> Trial (%d) CCR (%d) H (%d) Kernel-Value(%d)\n', i, CCR(idx), H(idx), KerVal(idx));
end

ACCR = mean(CCR);

fprintf(fd_m , 'notebook_kmsda>> Using CV criterion CCR (%d)\n', ACCR);

fclose(fd_m);

