
function res = learn_kmsda(prm)
%
% function res = learn_kmsda(prm)
%
% Learn the projection matrix and other KMSDA parameters (e.g. best 
% subclass partition, mean for zero mean sample normalization, etc.) using
% a training set and an appropriate criterion [1]. A cross-validation 
% (CV) procedure is used to learn the optimum parameters. At each CV cycle
% a new subclass partition, and kernel value are validated. The spliting at
% each CV cycle is performed using a new nongaussianity criterion based on
% the negentropy increment [2,3]. The quality of a partition is assessed
% using the correct recognition rate (CCR) along the CV cycles. For
% classification we use the nearest neighbour classifier at the KMSDA
% subspace.
%
%
% IN
%
% prm : input parameter structure with the following fields:
%
%  fea_Train: training feature vectors N x F
%
%  gnd_Train: ground truth labels N x 1
%
%  Nmin_sub: the minimum permitted number of observations in a subclass
%
%  Hmx: user defined maximum allowed total number of subclasses
%
%  normType: method to use for normalizing the observations, i.e. zero mean 
%  sample, unity norm or no normalization at all
%
%  distType: similarity (or disimilarity) measure to use for comparing
%  observations, i.e. Euclidean distance, or cosine similarity.
%
% kernel_opts - parameters of the kernel function
%               kernel_values: range of values for identifying optimal kernel parameter
%               KernelType: type of kernel - linear, Polynomial, PolyPlus, Gaussian
%               Kernel: 1 - data have been not kernelized
%               Regu: kernel regularization parameter
%
%  noOfRandSplitsLearn: Number of CV splits for learning the optimal KMSDA
%  parameters
%
%  learnRatio: ratio of training sample to use at each CV cycle for 
%  learning the KMSDA parameters, e.g., if learnRatio is 0.7 the validation
%  ratio for testing the computed parameters is 0.3 (at each CV split, 70%
%  of the training observations are used for learning the KMSDA parameters
%  and 30% of the observations are used for testing this set of parameters
% 
%
% OUT
%
% res: output parameter structure with the following fields:
%
%  fea_Mean: mean sample vector 1 x F (empty [] if unity norm normalization
%  is used)
%
%  W: optimum projection matrix (according to optimum subclass partition and kernel parameters)
%
%  H: optimum number of total subclasses
%
%  kernel_opts: the identifyied optimum kernel options
%
%  fea_Train: sorted training data for computing test data in the new feature space
%
%  gnd_Train: the respective training data labels
%
%  fea_Train_Proj: training observations projected in the KMSDA subspace
%
%
%
% Related references:
%
%
% 1. N. Gkalelis, V. Mezaris, I. Kompatsiaris, T. Stathaki, "Mixture
% subclass discriminant analysis link to restricted Gaussian model and
% other generalizations", IEEE Transactions on Neural Networks and Learning 
% Systems, vol. 24, no. 1, pp. 8-21, January 2013.
%
% 2. Luis F. Lago-Fernandez, Fernando J. Corbacho: Using the Negentropy
% Increment to Determine the Number of Clusters. IWANN (1) 2009: 448-455
%
% 3. Luis F. Lago-Fernandez, Fernando J. Corbacho: Normality-based
% validation for crisp clustering. Pattern Recognition 43(3): 782-795
% (2010)
%
%
%
% Author: Nikolaos Gkalelis - CERTH-ITI
% Email: gkalelis@iti.gr
%
% Created 01 Dec 2013.
%


%% read input parameters
learnRatio = prm.learnRatio;
noOfRandSplitsLearn = prm.noOfRandSplitsLearn;
normType = prm.normType;
distType = prm.distType;
Nmin_sub = prm.Nmin_sub;
Hmx = prm.Hmx;
fea = prm.fea_Train;
gnd = prm.gnd_Train;

% Kernel parameters
kernel_opts.kernel_values = prm.kernel_opts.kernel_values;
kernel_opts.KernelType = prm.kernel_opts.KernelType; 
kernel_opts.Kernel = prm.kernel_opts.Kernel; 
kernel_opts.Regu = prm.kernel_opts.Regu; 
Ngp = length(kernel_opts.kernel_values); % number of grid points for the kernel parameters

clear prm;

valRatio = 1- learnRatio; % validation ratio

%% trace
logFile = char( strcat('log', '_', 'learn_kmsda', '_' , normType, '_', distType, '_', num2str(learnRatio*10), '.txt'));
fd = fopen(logFile, 'a');

fprintf(fd , 'learn_kmsda>> Entering\n');

%% sort data according to their ground truth label
[fea_sorted ground_sorted] = sortFeaLabels(fea, gnd);
clear fea_Train gnd_Train;
fea = fea_sorted; % now size is samples x features
gnd = ground_sorted;
clear fea_Train_sorted gnd_sorted;

%% normalize training set and learn total mean (using all training set !!!)
N = size(fea, 1);
if strcmp(normType,'zeroMean')
    fea_Mean = mean(fea,1); % mean of ALL TRAIN samples
    fea = fea - repmat(fea_Mean, N,1);
    res.fea_Mean = fea_Mean; % keep parameter !!!
elseif strcmp(normType,'unityNorm')
    for n=1:N
        fea(n,:) = fea(n,:) ./ max(eps,norm(fea(n,:)));
    end
    res.fea_Mean = []; % dummy !!!
elseif strcmp(normType,'NoNorm')
    res.fea_Mean = []; % dummy !!!
end

%% learning
% learn optimal subclass partition using a cross validation procedure

% compute maximum allowable number of subclasses
classLbl = sort(unique(gnd));
C = length(classLbl);
Nci = zeros(1,C); % number of observations per class
for i=1:C
    Nci(i) = sum(gnd==i);
end
Nmin_sub = min(Nmin_sub, floor(max(Nci)/2));
Hip = Nci; % maximum allowable subclasses per class
for i=1:C
    Hip(i) = max(1, floor(Nci(i)/Nmin_sub));
end
Hm = sum(Hip); % maximum allowable total number of subclasses
Hm = min(Hm, Hmx);
Hp = Hm - C; % maximum allowable additional subclasses

NG_H = zeros(1, Hp); % negentropy - additional number of subclasses

% CV procedure to select optimum subclass partition
% record CCR for this subclass partition if CV criterion is used        
CCR = zeros(Hp,Ngp,noOfRandSplitsLearn);

for  cv = 1:noOfRandSplitsLearn
    
    fprintf(fd , '\n------------------------------------------------------------------------------------------\n');
    fprintf(fd , 'learn_kmsda: split: (%d)\n', cv);
    fprintf(fd , '\n------------------------------------------------------------------------------------------\n');
    
    % split data for this CV
    trainIdx = false(1,N); % initialize
    testIdx = trainIdx;
    for i=1:C
        c = cvpartition(sum(gnd == classLbl(i)), 'holdout', valRatio);
        trainIdx(gnd == classLbl(i)) = training(c)';
        testIdx(gnd == classLbl(i)) = test(c)';
    end

    fea_Train = fea(trainIdx, :); % training set for this CV
    gnd_Train = gnd(trainIdx);
    fea_Test = fea(testIdx, :); % test set
    gnd_Test = gnd(testIdx);
    clear trnInd testInd;

    % sort data according to their ground truth label
    [fea_Train gnd_Train] = sortFeaLabels(fea_Train, gnd_Train);
    [fea_Test gnd_Test] = sortFeaLabels(fea_Test, gnd_Test);
    
    gnd_Train_subclass = ones(length(gnd_Train), 1); % we first intialize one subclass per class
    
    % Record classification rates for the different parameters
    for h = 1:Hp % total number of additional subclasses
        
        % select class to repartition and increment number of subclasses for the selected class
        [c_id NG_H(h)]= selectClassToRePartitionPost( fea_Train, gnd_Train, gnd_Train_subclass, distType, Nmin_sub); % select Class to Re-Partition
        Hi = length(unique( gnd_Train_subclass( gnd_Train == c_id, 1 ))); % current number of subclasses of Class to Re-Partition
        [ fea_Train_i_tmp gnd_i_tmp Ni stopPartitioningThisClass] = incrementSubclassesOfClass( fea_Train( gnd_Train == c_id, :), Hi+1, distType, Nmin_sub);
        clear Ni Hi;
        fea_Train( gnd_Train == c_id, :) = fea_Train_i_tmp; % sort class observations and ground according to the new subclass labelling
        gnd_Train_subclass(gnd_Train == c_id, :) = gnd_i_tmp;
        clear c_id fea_Train_i_tmp gnd_i_tmp;
        
        if stopPartitioningThisClass == true
            error(1, 'learn_kmsda>> Terminating partitioning as reached to subclass with very few samples\n');
        end
        
        % evaluate different kernels for this partition
        for gp = 1:Ngp % number of grid points for the kernel parameters
            
            kernel_opts.t = kernel_opts.kernel_values(gp); % compute kernel value
            fprintf(fd , '\n');
            fprintf(fd , 'learn_kmsda>> Validating: Subclasses(%d) Kernel-value(%d)\n', C+h, kernel_opts.t);
        
            % compute projection matrix using all training set
            fea_Train_kernel = constructKernel(fea_Train, fea_Train, kernel_opts);
            kmsda_opts = [];
            W = KMSDA(fea_Train_kernel, gnd_Train, gnd_Train_subclass, kmsda_opts); % compute projection matrix using KMSDA
            fea_Train_Proj = fea_Train_kernel* W; % project train set
            
            % project test set in the new feature space
            fea_Test_kernel = constructKernel(fea_Test, fea_Train, kernel_opts);
            fea_Test_Proj = fea_Test_kernel* W;

            % classify            
            recognized = knnclassify(fea_Test_Proj, fea_Train_Proj, gnd_Train, 1, distType, 'nearest');
            CCR(h,gp,cv) = sum(gnd_Test == recognized)/length(gnd_Test);
            fprintf(fd , 'learn_kmsda>> CV(%d): Subclasses(%d) Kernel-value(%d) - CCR(%d)\n', cv, C+h, kernel_opts.t, CCR(h,gp,cv));
            
            clear fea_Train_kernel fea_Train_Proj fea_Test_kernel fea_Test_Proj W recognized;
            
        end % kernel values
        
    end % subclasses
    
    clear fea_Train gnd_Train fea_Test gnd_Test;
    
end % CV procedure

%% identify optimum parameters
ACCR = mean(CCR, 3); % average along different CVs
accr_max = max(max(ACCR));
[h_o gp_o] = find(ACCR == accr_max, 1, 'first'); % h_o: optimum number of subclasses; gv_o: index of optimum kernel value
kv_o = kernel_opts.kernel_values(gp_o); % optimum kernel value
fprintf(fd , 'learn_kmsda>> Optimum values: Subclasses(%d) Kernel-value(%d) - Maximum ACCR(%d)\n', C+h_o, kv_o, accr_max);

%% compute overall projection matrix (using all training data)

% increment to the optimum number of subclasses
[fea gnd] = sortFeaLabels(fea, gnd);
gnd_subclass = ones(length(gnd), 1); % we first intialize one subclass per class
for h = 1:h_o
    [c_id NG_H(h)]= selectClassToRePartitionPost( fea, gnd, gnd_subclass, distType, Nmin_sub); % select Class to Re-Partition
    Hi = length(unique( gnd_subclass( gnd == c_id, 1 ))); % current number of subclasses of Class to Re-Partition
    [ fea_i_tmp gnd_i_tmp Ni stopPartitioningThisClass] = incrementSubclassesOfClass( fea( gnd == c_id, :), Hi+1, distType, Nmin_sub);
    clear Ni;
    fea( gnd == c_id, :) = fea_i_tmp; % sort class observations and ground according to the new subclass labelling
    gnd_subclass(gnd == c_id, :) = gnd_i_tmp;
end

% compute projection matrix and record computed optimum values for evaluation stage
kernel_opts.t = kv_o; % record optimum kernel value
fea_kernel = constructKernel(fea, fea, kernel_opts);
res.W = KMSDA(fea_kernel, gnd, gnd_subclass, kmsda_opts); % compute projection matrix using KMSDA
res.fea_Train_Proj = fea_kernel* res.W; % project train set
res.H = C + h_o;
res.fea_Train = fea; % keep the sorted version of training features for the evaluation stage
res.gnd_Train = gnd;
res.kernel_opts = kernel_opts;

fprintf(fd , 'learn_kmsda>> Exiting\n');

fclose(fd);


