function r = EpitomeReconstruct(x, eMean, eVar, patchSize, varargin) %EPITOMERECONSTRUCT Reconstructs array data from an epitome. % R = EpitomeReconstruct(x, eMean, eVar, patchSize) reconstructs x % from its epitome, eMean and eVar, using patches of size "patchSize". % % x is array data with dimension no greater than 3, eg. videos, images, % audio, etc. The array can however have an additional trailing % dimension that is assumed to be independent, such as colour. % % EpitomeReconstruct(..., 'patchPeriod', T) reconstructs only patches % from x on average [patchSize .* T] distance apart. Default value of T % is 0.25. A value of 0 means that every overlapping patch is used. % % EpitomeReconstruct(..., 'verbose', V) shows progress information if % V is true. Default value of V is true. % % EpitomeReconstruct(..., 'grayScale', G) identifies the data x as % being gray-scale, meaning that the trailing dimension is a singleton. % This argument disambiguates a 2D colour image from a 3D gray-scale % video as they both have 3 non-singleton dimensions, i.e. % length(size(x)) is 3 in both cases. Default value of G is false. % % See also LearnEpitome % Reference: % % 1. V. Cheung, B. J. Frey, and N. Jojic. Video epitomes. In Proc. % IEEE Conf. Computer Vision and Pattern Recognition (CVPR), 2005. % % 2. N. Jojic, B. J. Frey, and A. Kannan. Epitomic analysis of appearance % and shape. In Proc. IEEE Conf. Computer Vision (ICCV), 2003. % Copyright (C) 2005 Vincent Cheung (vincent@psi.toronto.edu, http://www.psi.toronto.edu/~vincent/) % % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 2 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % $Revision: 0.9 $ $Date: Sept. 30, 2005 $ if(nargin < 4) error('MATLAB:LearnEpitome:NotEnoughInputs', 'Requires at least 4 inputs.') end % default values for optional arguments patchPeriod = 0.25; verbose = true; grayScale = false; % parse the variable number of arguments i = 1; while(i < length(varargin)) if(isstr(varargin{i})) if(strcmp(upper(varargin{i}), 'PATCHPERIOD')) if(isnumeric(varargin{i+1})) patchPeriod = varargin{i+1}; i = i + 1; end elseif(strcmp(upper(varargin{i}), 'VERBOSE')) if(islogical(varargin{i+1})) verbose = varargin{i+1}; i = i + 1; end elseif(strcmp(upper(varargin{i}), 'GRAYSCALE')) if(islogical(varargin{i+1})) grayScale = varargin{i+1}; i = i + 1; end end end i = i + 1; end dataNumDim = length(size(x)) - ~grayScale; eNumDim = length(size(eMean)) - ~grayScale; if(eNumDim > 3 || dataNumDim > 3) error('MATLAB:LearnEpitome:TooManyDimensions', 'The dimension of the data cannot exceed 3.'); end % permute the dimensions of 2D and 1D data to simulate 3D data if(dataNumDim == 2) x = permute(x, [4 1 2 3]); end if(dataNumDim == 1) x = permute(x, [3 4 1 2]); end if(eNumDim == 2) eMean = permute(eMean, [4 1 2 3]); eVar = permute(eVar, [4 1 2 3]); end if(eNumDim == 1) eMean = permute(eMean, [3 4 1 2]); eVar = permute(eVar, [3 4 1 2]); end % pad the patch size until they describe 3D data while(length(patchSize) < 3) patchSize = [1 patchSize]; end % pad only if the patchPeriod is of length 2 if(length(patchPeriod) == 2) patchPeriod = [1 patchPeriod]; end % numerical precision tolerance tol = 1e-10; xSize = size(x); if(grayScale) xSize = [xSize 1]; end eSize = size(eMean); % last possible location of patches in x xEndIdx = [xSize(1:end-1)] - patchSize + 1; % the distance between sampled patches patchDisp = max(floor(patchSize .* patchPeriod), 1); % patches can wiggle from their initial position up to half of the patch % displacement in each direction patchWiggle = floor(patchDisp/2); % set-up a grid of patch locations leftOver = mod(xEndIdx-1, patchDisp); leftOver(leftOver == 0) = patchDisp(leftOver == 0); % the first patch location not on an edge startIdx = floor((leftOver + patchDisp)/2) + 1; % the grid tPatchIdx = [1, startIdx(1):patchDisp(1):xEndIdx(1)-1, xEndIdx(1)]; rPatchIdx = [1, startIdx(2):patchDisp(2):xEndIdx(2)-1, xEndIdx(2)]; cPatchIdx = [1, startIdx(3):patchDisp(3):xEndIdx(3)-1, xEndIdx(3)]; patchFixedIdx = zeros(length(tPatchIdx) * length(rPatchIdx) * length(cPatchIdx), 3); count = 1; % set-up the grid, where the patch locations are the rows in the % patchFixedIdx matrix for t = tPatchIdx for r = rPatchIdx for c = cPatchIdx patchFixedIdx(count, :) = [t r c]; count = count + 1; end end end % cumulative sum matrices eeCumSum = zeros(eSize(1:3)+patchSize); % the size of the FFTs FFTSize = eSize(1:3) + patchSize - 1; % set the FFT optimization strategy fftw('planner','exhaustive'); % the all ones FFT (for summing log(eVar)) onesFFT = fft(fft(fft(ones(patchSize), FFTSize(1), 1), FFTSize(2), 2), FFTSize(3), 3); % subscripts to be used to obtain the valid areas of the convolution % note that the epitome is circularly extended by the size of the patch % during convolution convSubs = cell(3, 1); for i = 1 : length(convSubs) convSubs{i} = patchSize(i) : eSize(i) + patchSize(i) - 1; end minDist = zeros(xSize(1:end-1)); minIdx = zeros(xSize(1:end-1)); % temporary matrices for collecting sufficient statistics sumQ = zeros(xSize(1:end-1)); sumQX = zeros(xSize); % the training patches patchIdx = patchFixedIdx; % allow the patches not on a "face" to move from their initial position, % but ensure that the patches fall between 1 and xEndIdx tempIdx = patchIdx(:, 1) ~= 1 & patchIdx(:, 1) ~= xEndIdx(1); patchIdx(tempIdx, 1) = min(max(patchIdx(tempIdx, 1) + floor(rand(sum(tempIdx), 1) * (patchWiggle(1)*2 + 1)) - patchWiggle(1), 1), xEndIdx(1)); tempIdx = patchIdx(:, 2) ~= 1 & patchIdx(:, 2) ~= xEndIdx(2); patchIdx(tempIdx, 2) = min(max(patchIdx(tempIdx, 2) + floor(rand(sum(tempIdx), 1) * (patchWiggle(2)*2 + 1)) - patchWiggle(2), 1), xEndIdx(2)); tempIdx = patchIdx(:, 3) ~= 1 & patchIdx(:, 3) ~= xEndIdx(3); patchIdx(tempIdx, 3) = min(max(patchIdx(tempIdx, 3) + floor(rand(sum(tempIdx), 1) * (patchWiggle(3)*2 + 1)) - patchWiggle(3), 1), xEndIdx(3)); % circularly wrap the epitome and take its cumulative sum eeCumSum(2:end, 2:end, 2:end) = cumsum(cumsum(cumsum(sum(eMean([1:end 1:patchSize(1)-1], [1:end 1:patchSize(2)-1], [1:end 1:patchSize(3)-1], :).^2 ./ eVar([1:end 1:patchSize(1)-1], [1:end 1:patchSize(2)-1], [1:end 1:patchSize(3)-1], :), 4), 1), 2), 3); eMeanOverVar = eMean([1:end 1:patchSize(1)-1], [1:end 1:patchSize(2)-1], [1:end 1:patchSize(3)-1], :) ./ eVar([1:end 1:patchSize(1)-1], [1:end 1:patchSize(2)-1], [1:end 1:patchSize(3)-1], :); eMeanOverVarFFT = fft(fft(fft(eMeanOverVar, FFTSize(1), 1), FFTSize(2), 2), FFTSize(3), 3); eInvVarFFT = fft(fft(fft(1 ./ eVar([1:end 1:patchSize(1)-1], [1:end 1:patchSize(2)-1], [1:end 1:patchSize(3)-1], :), FFTSize(1), 1), FFTSize(2), 2), FFTSize(3), 3); % compute the sum of the log(eVar) for each patch in the epitome eLogVarFFT = fft(fft(fft(sum(log(eVar([1:end 1:patchSize(1)-1], [1:end 1:patchSize(2)-1], [1:end 1:patchSize(3)-1], :)), 4), FFTSize(1), 1), FFTSize(2), 2), FFTSize(3), 3); % use symmetric with the last ifft to guarantee that the result is real eLogVarFullConv = real(ifft(ifft(ifft(eLogVarFFT .* onesFFT, [], 3), [], 2), [], 1)); eLogVarSum = eLogVarFullConv(convSubs{:}); % clear the matrices used for collecting sufficient statistics sumQ(:) = 0; sumQX(:) = 0; tic % for each training case for i = 1 : size(patchIdx, 1) % display progress and estimation of time to completion if(verbose && mod(i-1, ceil(length(patchIdx)/10)) == 0) disp([num2str(round((i-1)/length(patchIdx)*100)) '% Complete']); if(i > 1) disp(['Time Remaining: ', num2str(toc * (1 / ((i-1)/length(patchIdx)) - 1)) ' seconds']); end disp(' '); end time = patchIdx(i, 1); row = patchIdx(i, 2); col = patchIdx(i, 3); % ============================ % E-Step % ============================ xPatch = x(time:time+patchSize(1)-1, row:row+patchSize(2)-1, col:col+patchSize(3)-1, :); % the sum of eMean.^2 ./ eVar for each patch in the epitome eeSum = eeCumSum(patchSize(1)+1:end, patchSize(2)+1:end, patchSize(3)+1:end) - eeCumSum(patchSize(1)+1:end, patchSize(2)+1:end, 1:end-patchSize(3)) - eeCumSum(patchSize(1)+1:end, 1:end-patchSize(2), patchSize(3)+1:end) + eeCumSum(patchSize(1)+1:end, 1:end-patchSize(2), 1:end-patchSize(3)) - (eeCumSum(1:end-patchSize(1), patchSize(2)+1:end, patchSize(3)+1:end) - eeCumSum(1:end-patchSize(1), patchSize(2)+1:end, 1:end-patchSize(3)) - eeCumSum(1:end-patchSize(1), 1:end-patchSize(2), patchSize(3)+1:end) + eeCumSum(1:end-patchSize(1), 1:end-patchSize(2), 1:end-patchSize(3))); % compute correlations using convolutions via FFT % flip x when computing the FFT so that correlations are performed xPatchFFT = fft(fft(fft(xPatch(end:-1:1, end:-1:1, end:-1:1, :), FFTSize(1), 1), FFTSize(2), 2), FFTSize(3), 3); xxPatchFFT = fft(fft(fft(xPatch(end:-1:1, end:-1:1, end:-1:1, :).^2, FFTSize(1), 1), FFTSize(2), 2), FFTSize(3), 3); % xeFullConv = sum(real(ifft(ifft(ifft(eMeanOverVarFFT .* xPatchFFT, [], 1), [], 2), [], 3)), 4); % xxFullConv = sum(real(ifft(ifft(ifft(eInvVarFFT .* xxPatchFFT, [], 1), [], 2), [], 3)), 4); xeFFT = sum(eMeanOverVarFFT .* xPatchFFT, 4); xxFFT = sum(eInvVarFFT .* xxPatchFFT, 4); xeFullConv = real(ifft(ifft(ifft(xeFFT, [], 3), [], 2), [], 1)); xxFullConv = real(ifft(ifft(ifft(xxFFT, [], 3), [], 2), [], 1)); % only look at the part of the convolution that does not assume % zero-padded arrays. xeSum = xeFullConv(convSubs{:}); xxSum = xxFullConv(convSubs{:}); % the patch distances currDist = eeSum - 2*xeSum + xxSum + eLogVarSum; % ============================ % M-Step % ============================ % find the minimum distance and the corresponding index [minDist, tempIdx] = min(currDist(:)); [t, r, c] = ind2sub(size(currDist), tempIdx); % collect sufficient statistics, taking into consideration that the % epitome wraps around xPatchIdx = {time:time+patchSize(1)-1, row:row+patchSize(2)-1, col:col+patchSize(3)-1}; eWrapIdx = {mod(t-1:t+patchSize(1)-2, eSize(1)) + 1, mod(r-1:r+patchSize(2)-2, eSize(2)) + 1, mod(c-1:c+patchSize(3)-2, eSize(3)) + 1}; sumQ(xPatchIdx{:}) = sumQ(xPatchIdx{:}) + 1; sumQX(xPatchIdx{:}, :) = sumQX(xPatchIdx{:}, :) + eMean(eWrapIdx{:}, :); end % avoid divide by zero zeroIdx = sumQ(:, :, :, ones(size(x, 4), 1)) == 0; sumQX(zeroIdx) = x(zeroIdx); sumQ(sumQ == 0) = 1; % compute the reconstruction r = sumQX ./ sumQ(:, :, :, ones(size(x, 4), 1)); if(dataNumDim == 2) r = permute(r, [2 3 4 1]); end if(dataNumDim == 1) r = permute(r, [3 4 1 2]); end