function [G,target] = SAOL(Niter,Y,l,num,G,alPar,thres)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Implements the AOL Gradient Descent algorithm using Niter iterations
% starting from an operator G with measurement data Y, for which we have
% that Op*y is sparse. We want to find Op.
% In addition, we use a replacement strategy for the operator G, so if two
% rows have a larger overlap than thresRepl, one of the rows is
% reinitialized.
%
% Input:
%   -Niter  ... Number of iterations
%   -Y      ... Signal data
%   -l      ... Cosparsity
%   -num    ... Number of sampled signals in each step
%   -G      ... Starting operator (default: random, size 2dxd)
%   -alPar  ... stepsize parameter (default: 0.3)
%   -thres  ... replacement threshold (default: 1 = no replacement)
%
% Output:
%   -G      ... the learned operator
%   -target ... the value of the target function for all iterations
%
% (c) 03.04.2016 Michael Sandbichler
%
% This software is a free software distributed under the terms of the GNU 
% Public License version 3 (http://www.gnu.org/licenses/gpl.txt). You can 
% redistribute it and/or modify it under the terms of this licence, for 
% personal and non-commercial use and research purpose. 
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% initializations and error catches
[d,N] = size(Y);

if nargin < 4
    err('Too few input arguments.');
    return;
end

if nargin < 5
    G= randn(2*d,d);
    scale = diag(G*G');
    G = diag(1./sqrt(scale))*G;
else
[n,d1] = size(G);
if d1~=d
   disp('Input operator G has the wrong size. Choosing a random operator instead.');
    G= randn(n,d);
    scale = diag(G*G');
    G = diag(1./sqrt(scale))*G;
end
end

if nargin<6
    al = 1;
end

if nargin < 7
    thres= 1;
end

if num > N
    num = N;
end

target(1:Niter) = 0;

%% the algorithm
for iter =1:Niter
    
    %draw a sample of size num from the data Y
    sample = randperm(N);
    sample = sample(1:num);
    ySub = Y(:,sample);
    
    
    
    S = (G*ySub);
    
    %find the l smallest entries in each column
    [~,I] = sort(abs(S),1);
    
    
    X=S;
    for k=1:num
        X(I(1:l,k),k)=0;
    end
    
    Gr = -2*(X-S)*ySub';
    
    %calculate the value of the target function on the training data
    target(iter) = 1/num*sum(sum(S(I).^2,1));
    
    
    for k= 1:n
    %perform the gradient step for each of the rows of G
     
    grad = Gr(k,:);
   
     %compute the descent parameter
    aux1 = S(k,floor(0.95*num):num);
    aux2 = grad*ySub(:,floor(0.95*num):num);
    aux3= abs(2*(aux2.*aux1-aux1.^2*(grad*G(k,:)'))./(aux2.^2-aux1.^2*(grad*grad')));   
    al = median(aux3)*alPar;
    
    
     G(k,:) = G(k,:) - al*grad; %Gradient descent step

    %end
    
    %normalize or redraw the row if the norm is too small
    if(norm(G(k,:))>1e-13)
    G(k,:) = G(k,:)/norm(G(k,:));
    else
    G(k,:) = randn(1,d);
    G(k,:) = G(k,:)/norm(G(k,:));
    end
    
    end

    %replace duplicate rows    
    aux = find((abs(G*G'-eye(n))')>thres);
    if ~isempty(mod(aux,n+1)) 
        doubleRows = [ceil(aux/n),mod(aux-1,n)+1];
        while(~isempty(doubleRows))
        %redraw the duplicate rows
        G(doubleRows(1,1),:) = randn(1,d);  
        G(doubleRows(1,1),:) = G(doubleRows(1,1),:)/norm(G(doubleRows(1,1),:));
        aux = find((abs(G*G'-eye(n))')>thres);
        doubleRows = [ceil(aux/n),mod(aux,n)];
        disp('Duplicate row replaced');
        end
    end
    
     
end
% close(h);
