function [Factors,fit,it,FitStory,ConvCond,LmSt,JacSV,Gr]=gnparafac(X,F,Options,varargin);
% function [Factors,fit,it,FitStory,ConvCond,LmSt,JacSV,Grad] = GNParafac (X,F,Options,A,B,C);
% 
% Description:
% Fits a PARAFAC model to a three way array using a Levenberg-Marquadt algorithm
% 
% Inputs: 
% X: array of doubles
% F: number of factors to extract
% Options (optional): see PAROptions
% A,B,C: initial estimations for the loading matrices
% 
% Outputs:
% Factors: cell vector with the estimations for the factors (each element contains a matrix with the loadings in the corresponding mode)
% 
% fit: final value of the loss function
% 
% it: number of iterations necessary to reach convergence
% 
% FitStory: values of the loss functions along the iterations
% 
% ConvCond : set to 1 when a the convergence criteria is met
% ConvCond(1) relative decrease in fit
% ConvCond(2) relative change in parameters
% ConvCond(3) ratio between loss function and total sum of squares
% ConvCond(4) gradient infinity norm
% ConvCond(5) J'*J matrix is nearly singular in more than 5 consecutive iterations
% ConvCond(6) max number of iterations reached
% LmSt: it contains the values of the Lambda damping parameter along with the iterations in the first column. The second column contain the "large" iteration number (i.e. the number of Jacobian estimations)
% 
% JackSV: singular values of the Jacobian (only for LM algorithm). See GNPARAFAC
% 
% Grad: gradient value upon convergence
% Called by:
% Model_PARAFAC\GenParafac
% 
% Subroutines:
% Internal:Names and hyperlinks
% External: cleanx, dtld, fac2let, gnlinesearch, gnparafac, initpar, kr, nmodel, nshape, parjac, parjacs, paroptions, pfloss, scale_factors, swatld
% 
% Author: 
% Giorgio Tomasi 
% Royal Agricultural and Veterinary University 
% MLI, LMT, Chemometrics group 
% Rolighedsvej 30 
% DK-1958 Frederiksberg C 
% Danmark 
% 
% Last modified: 15-May-2002 18:40:52
% 
% Contact: Giorgio Tomasi, gt@kvl.dk 
% 
% References
%


if nargin < 2
   Options = [];
end
if isempty(Options)
   Options = ParOptions;
end
Options = ParOptions(Options);
if strcmpi(Options.display,'none')
   Options.display = pi;
end

%Check for initial values and number of factors
Init = 1;
if nargin < 2 | (isempty(F) & isempty(varargin))
   error('The number of factor must be defined')
end
if ~isempty(varargin)
   if length(varargin) ~= ndims(X)
      error('Inadequate number of loading matrices')
   end
   if any(size(X) ~= cellfun('size',varargin,1))
      error('Initial values matrices not consistent with array dimensions')
   end
   F       = size(varargin{1},2);
   Init    = 0;
   Factors = varargin;
end

%individuate the missing and present values and eliminate the all missing values' slabs
[X,No{1:ndims(X)}] = CleanX(X);
DimX               = size(X);
Present            = ~isnan(X(:));
if any(~Present) & any(strcmpi({'dtld','swatld'},Options.initialisation))
   Options.initialisation = 'random';
end

%Initialise
if Init
   [conv,varargin{1:ndims(X)}] = InitPar(X,F,Options.initialisation,Options.inititer,Options.inittol);
else
   for i = 1:ndims(X)
      varargin{i} = varargin{i}(No{i},:);
   end
end

%Apply CLES
if strcmpi(Options.cles,'on')
   [varargin{1:ndims(X)}] = scale_factors(0,varargin{:});
end

%Some initial values
Beta   = 1;  %For MatLab(R) damping parameter update strategy
Cont1  = 0;  %Initialise the global "near" singular counter
conv   = 0;  %Not converged
it     = 0;  %Number of iterations
Nu     = 2;  %For H.B.Nielsen update scheme
LmSt   = []; %Lambda History

%Initial fit
[fit,fit0,FitStory] = deal(PFLoss(X,Present,0,varargin{:}));

%Total sum of squares
SSX            = sum(X(Present).^2);

%Show some diagnostics
if strcmpi(Options.diagnostics,'on')
   if F == 1
      disp(sprintf(' A %i component PARAFAC model',F))
   else
      disp(sprintf(' A %i components PARAFAC model',F))
   end
   disp([' will be fitted on a ' sprintf('%i',ndims(X)) '-way array of dimensions ',...
         sprintf('%i',DimX(1)) sprintf(' x %i',DimX(2:end))]);
   disp(' ')
   disp([' Initialisation: ',Options.initialisation])
   disp([' Max iterations:',num2str(Options.maxiter)]);
   if strcmpi(Options.cles,'on')
      disp(' CLES applied')
   end
   if strcmpi(Options.frin,'on')
      disp(' FRIN applied')
   end
   switch Options.linesearch
   case 'none'
      disp(' No line-search')
   case 'iter'
      disp('Line-search applied at each iteration')
   case 'diverg'
      disp('Line-search applied in case of divergence')
   end
   switch Options.normeqsolver
   case 'cholesky'
      disp(' Normal equations linear system solved by: Cholesky factorisation')
      disp(' Lambda increased if J''J is not positive definite')
   case 'ldivide'
      disp(' Normal equations linear system solved by: standard MatLab ldivide operator')
      disp(' Lambda increased if J''J is singular')
   case 'pcg'
      disp(' Normal equations linear system solved by: Preconditioned conjugate gradient')
      disp(sprintf('                               Tolerance: %1.1e',Options.pcgtol))
      disp(sprintf('                               Max iter: %i',Options.pcgmaxiter))
      if isempty(Options.pcgprecond)
         disp('                         Preconditioner:  none')
      end
   end
   switch Options.lambdaupdate
   case 'alpha'
      disp(' Lambda updated according to step-length')
   case 'hbn'
      disp(' Lambda updated according to Nielsen HB procedure')
   case 'levmar'
      disp(' Lambda updated according to gain ratio')
   end
   if strcmpi(Options.newton,'on')
      disp(' Second derivatives in use')
   elseif strcmpi(Options.separate,'on')
      disp(' Variable separation applied')
   end
   if strcmpi(Options.large,'on')
      disp(' Jacobian calculation adapted for large arrays')
   end
   disp([' Convergence criteria:   Relative fit  < ',num2str(Options.relfitconvcrit)])
   disp(['                         Update norm   < ',num2str(Options.parconvcrit)])
   disp(['                         Max(Gradient) < ',num2str(Options.gradconvcrit)])
   disp(['                         Loss function < ',num2str(Options.fitconvcrit*SSX)])
end

if ~rem(Options.display,1)
   disp(' ')
   disp(' Alpha                 Fit          It   EV %     Rho            Lambda             Lin. Decrease      Max Gr')
end

Factors = varargin; % Only for clarity, this is absolutely not necessary
%Start fitting
while ~conv & it <= Options.maxiter
   if strcmpi(Options.cles,'on')
      [Factors{1:ndims(X)}] = scale_factors(0,Factors{:});
   else
      [Factors{[2:ndims(X) 1]}] = scale_factors(1,Factors{[2:ndims(X) 1]});
   end
   ThetaFix    = {};
   Theta       = {};
   ThetaFix{1} = logical(zeros(DimX(1)*F,1));
   for i = 2:length(Factors)
      LFix        = zeros(size(Factors{i}));
      Theta{i}    = Factors{i}(:);
      ThetaFix{i} = logical(zeros(DimX(i)*F,1));
      if strcmpi(Options.frin,'on')
         % Fix the maximum value of each factor scores (i.e. columns of A) to max(X(:))
         [UseLess,MaxL]                                  = max(abs(Factors{i}));
         ThetaFix{i}(sub2ind(size(Factors{i}),MaxL,1:F)) = 1;
      end
   end
   FactorsOld = Factors;
   
   %calculate the J'*J and the Gradient
   if strcmpi(Options.separate,'on')
      %Apply variable separation, the first mode is "removed"
      ThetaFix = cat(1,ThetaFix{2:end});
      Theta    = cat(1,Theta{2:end});
      [JtJ,Gr] = ParJacSep(Factors(2:end),X,~ThetaFix);
   else
      ThetaFix   = cat(1,ThetaFix{:});
      Theta      = cat(1,Theta{:});
      if strcmpi(Options.large,'on')
         %Use Jacobian calculation for large matrices
         [JtJ,Gr] = ParJacS(Factors,X,~ThetaFix);
      else
         %Use normal (faster in interpreted m-files) Jacobian calculation
         [JtJ,Gr] = ParJac(Factors,X,~ThetaFix);
      end
   end
   
   %Calculate second derivatives
   if strcmpi(Options.newton,'on')
      Q = ParHes(Factors,X - nmodel(Factors),~ThetaFix);
   else
      Q = 0;
   end
   
   %Hessian estimation
   Phi = JtJ - Q;
   
   %Initialise the damping parameter if necessary
   if it == 0;
      Lambda = full(max(diag(Phi)));
   end
   
   %Recompute gradient for regularised problem
   Cont   = 0;   %Initialise consecutive "near" singular counter
   FitOld = FitStory(end);
   DoIt   = 1;
   while DoIt & it <= Options.maxiter %Begin the internal loop
      Alpha = 1;
      LmSt(end+1,1:2) = [Lambda,length(FitStory)];                 %Update Lambda History
      Phi             = Phi + Lambda * speye(sum(~ThetaFix)); %Compute left hand side of the normal equations
      it              = it + 1;                               
      
      %Solve the system of linear equations
      switch lower(Options.normeqsolver)
      case 'pcg' %Use preconditioned conjugate gradients factorisation to solve the normal equations
         if strcmp(Options.pcgprecond,'norm')
            Options.pcgprecond = diag(diag(Phi));
         end
         [delta,flag] = pcg(Phi,Gr,Options.pcgtol,Options.pcgmaxiter,Options.pcgprecond);
         Do = 1;
         if ~any([0 1] == flag)
            Do = 0;
         end
         
      case 'cholesky' %Use cholesky factorisation to solve the normal equations
         try
            Phi   = chol(Phi);
            delta = Phi\(Phi'\Gr); %Update calculated by back substitution
            Do    = 1; 
         catch
            Do = 0; %If it is not possible to perform the Cholesky decomposition -> the Hessian is not positive definite
            %The computed step might not be a descending direction, hence the step is rejected and a new (larger) 
            %Lambda is applied.
         end
         
      case 'ldivide'  %Use cholesky if positive definite LU otherwise
         lastwarn(''); % Reset the warning message record
         warning off
         delta = Phi \ Gr; %Regression step
         warning on
         LW = lastwarn;
         if ~isempty(LW) & strcmpi(LW(1:39),'Matrix is singular to working precision.') %The matrix is singular
            Do    = 0;
            Cont  = Cont + 1;   %Update the consecutive singular counter
         else
            Cont  = 1; %Reinitialise the consecutive singular counter
            Do    = 1; %Accept the update
         end
         
      end
      
      if Do %If the matrix is non-singular or positive definite in the case of "Cholesky-based" update strategy
         %Calculate the linear decrease of the loss function
         LinDecrease = delta(:)'*(Lambda * delta + Gr); %the plus is just for the fact that D = -J
         NDelta      = norm(delta);
         if sign(LinDecrease) == -1
            delta = - delta;
            LinDecrease = delta(:)'*(Lambda * delta + Gr);
         end
            
         %Reshape update vector in the corresponding update matrices for A, B and C
         DD              = zeros(length(ThetaFix),1);
         DD(~ThetaFix)   = delta;
         if strcmpi(Options.separate,'on')
            
            [UpDate{2:ndims(X)}] = fac2let(DD,DimX(2:end),F);
            Z   = UpDate{end} + Factors{end};
            ZtZ = Z'*Z;
            for i = ndims(X)-1:-1:2
               Z   = kr(Z,UpDate{i} + Factors{i});
               Ztp = UpDate{i} + Factors{i};
               ZtZ = ZtZ .* (Ztp'*Ztp);
            end
            Factor    = nshape(X,1) * Z * pinv(ZtZ);
            UpDate{1} = Factor - FactorsOld{1};
            
         else
            [UpDate{1:ndims(X)}] = fac2let(DD,DimX,F);
         end
         
         %Update fit and save old
         fit         = PFLoss(X,Present,1,Factors{:},UpDate{:});
         if strcmpi(Options.linesearch,'iter') %Always to line - search
            [Alpha,fit] = GNLineSearch(X,Present,Alpha,Factors{:},UpDate{:});
         elseif strcmpi(Options.linesearch,'diverg')  % Do line search only if divergence occurs
            if FitOld < fit;
               [Alpha,fit] = GNLineSearch(X,Present,Alpha,Factors{:},UpDate{:});
            end
         end
         if Alpha > 1e-7 %Accept step if the step length is greater than 1e-7
            AcceptStep = 1;
            LambdaOld  = Lambda; 
            %Update the damping parameter
            Rho       = (FitOld - fit) / LinDecrease; %Gain Ratio
            switch lower(Options.lambdaupdate)
            case 'alpha'
               %Use matlab optimisation toolbox update
               if  Rho < Beta
                  LambdaUpdate = 1 + (fit - FitOld + LinDecrease)/(Lambda * Alpha);
               else   
                  LambdaUpdate = 1 / max((1 + Alpha),3);
               end
               
            case 'levmar'
               %Use standard Levenberg - Marquadt update scheme
               if Rho <= 0
                  AcceptStep = 0;
               elseif Rho < 0.25
                  LambdaUpdate = Options.lambdaudpar(1);
               elseif Rho > 0.75
                  LambdaUpdate = Options.lambdaudpar(2);
               else
                  LambdaUpdate = 1;
               end
               
            case 'hbn'
               if Rho < 0
                  AcceptStep = 0;
                  Nu         = Nu * 2;
               else
                  LambdaUpdate = max(Options.lambdaudpar(2),1 - (2 * Rho - 1)^3);
                  Nu           = Options.lambdaudpar(1); 
               end
            end
            if AcceptStep
               for i = 1:length(Factors)
                  Factors{i} = Factors{i} + Alpha * UpDate{i};
               end
               DoIt = 0; %Exit inner loop
            else
               LambdaUpdate = Nu;
            end
            LambdaOld = Lambda;
            Lambda    = Lambda * LambdaUpdate; %Update damping parameter
         else %Reject step because the step-length is too short
            LambdaOld = Lambda;
            switch lower(Options.lambdaupdate)
            case 'hbn'
               Nu = Nu * 2;
            end
            Lambda = Lambda * Nu; %Increase Lambda in order to take a shorter step
         end
      else
         %The Hessian approximation is "nearly" singular or not positive definite (for the "Cholesky-based" scheme)
         LambdaOld = Lambda;
         switch lower(Options.lambdaupdate)
         case 'hbn'
            Nu = Nu * 2;
         end
         Lambda = Lambda * Nu;
      end
   end %End of the inner loop
   
   FitStory(end+1) = fit;
   % Show some diagnostics
   if ~rem(it,Options.display)
      AlStr  = num2str(Alpha);
      AlStr  = [AlStr,char(32*ones(1,8-length(AlStr)))];
      FitStr = sprintf('%12.10f',FitStory(end));
      FitStr = [char(32*ones(1,22-length(FitStr))),FitStr];
      ItStr  = num2str(it);
      ItStr  = [char(32*ones(1,length(num2str(Options.maxiter))-length(ItStr))),ItStr];
      VarStr  = sprintf('%2.4f',100*(1-FitStory(end)/SSX));
      VarStr  = [char(32*ones(1,7-length(VarStr))),VarStr];
      RatStr = sprintf('%2.4f',Rho);
      RatStr  = [char(32*ones(1,7-length(RatStr))),RatStr];
      LamStr = sprintf('%12.4f',LambdaOld);
      LamStr  = [char(32*ones(1,17-length(LamStr))),LamStr];
      LinStr = sprintf('%12.6f',LinDecrease);
      LinStr  = [char(32*ones(1,19-length(LinStr))),LinStr];
      GrStr = sprintf('%8.4f',norm(Gr,'inf'));
      GrStr  = [char(32*ones(1,19-length(GrStr))),GrStr];
      fprintf([' ',AlStr,'  ',FitStr,'  ',ItStr,'  ',VarStr,'  ',RatStr,'  ',LamStr,'   ',LinStr]);
      fprintf('\n');
   end
   %Check convergence
   ConvCond(1,1) = Options.relfitconvcrit >= abs(FitStory(end-1)-FitStory(end))/(FitStory(end) + eps); %Relative fit decrease
   ConvCond(2,1) = Options.parconvcrit    >= NDelta/norm(Theta);                                       %Relative change in the parameters
   ConvCond(3,1) = Options.fitconvcrit    >= FitStory(end) / fit0;                                     %Absolute fit change
   ConvCond(4,1) = Options.gradconvcrit   >= norm(Gr,'inf');                                           %Gradient "equal" to 0
   ConvCond(5,1) = Cont                   >  5;                                                        %Matrix "nearly" singular in more than 5 consecutive iterations 
   ConvCond(6,1) = it                     == Options.maxiter;                                          %Max. number of iterations
   if any(ConvCond)
      conv = 1;
   end %Converged
end %end of the outer loop

%Calculate the Jacobian Singular values and store the number of operations (will be removed for version 6.x)
JacSV = [];
if strcmpi(Options.jacsv,'on')
   JacSV = svd(full(JtJ + Q));
end

%Scale the factors according to the common convention
[Factors{1:end}] = scale_factors(1,Factors{:});
%Sort the factors according to their norm (in decresing order)
[nil,Seq] = sort(-sum(Factors{1}.^2));
for i = 1:ndims(X)
   Factors{i} = Factors{i}(:,Seq);
end

if strcmpi(Options.diagnostics,'on')  %Show the damping parameter and the fit history
   plotyy(1:size(LmSt,1),LmSt(:,1),1:size(LmSt,1),FitStory(LmSt(:,2)),@semilogy),title('Lambda history')
   drawnow
   ConvCrit = {'Relative fit decrease','Parameters'' update','Loss function value of less than machine precision',...
         'Gradient equal to zero','Phi found singular more than 5 consecutive iterations','Max number of iterations reached'};
   disp(' ')
   disp([' The GN algorithm has converged after ',num2str(it),' iterations'])
   disp(['Met convergence criterion: ',ConvCrit{min(find(ConvCond))}])
   disp(' ')
end

%-------------------------------------------------------------------------------------------------------------------------

function [varargout]=fac2let(Factors,DimX,Fac);

Wp=Fac;

if length(Wp)==1
   Wp=ones(1,length(DimX))*Wp;
end

if length(Factors)<sum(DimX.*Wp)
   error(' Too many components extracted')
elseif length(Factors)~=sum(DimX.*Wp)
   disp([' Inconsistency in input parameters (maybe wrong number of components are extracted)'])
end

if nargout~=length(DimX)
   error('The number of loading matrices do not correspond to the number of dimensions of the array')
end

Dims = length(DimX);
for i = 1:Dims
   St = sum(DimX([1:i-1]))*Fac+1;
   En = sum(DimX([1:i])*Fac);
   varargout{i} = reshape(Factors(St:En),DimX(i),Fac);
end