#define nreps 1

#include "mex.h"

void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
	int *c1=NULL, *c2=NULL, *s=NULL, K,p,N,i,j,n,klk, goout,goin,gooutb,goinb,goinc,gooutc, jsel,trig,ii,jj,idum;
	mxArray *mxn, *mxs;
	unsigned short int *exs=NULL;
	double *s_=NULL, **S=NULL, *u=NULL, dmax,fbest,w,wstar;

	N = (int)mxGetN(prhs[0]);
	K = p = (int)mxGetScalar(prhs[1]);
	S=mxCalloc(N,sizeof(*S)); S--;
		S[1]=mxGetPr(prhs[0]); S[1]--; for (i=2;i<=N;i++) S[i]=S[i-1]+N;
	plhs[0]=mxCreateNumericMatrix(K,nreps,mxUINT16_CLASS,mxREAL); exs=mxGetData(plhs[0]);

	s=mxCalloc(N,sizeof(s[1])); s--;
	c1=mxCalloc(N,sizeof(c1[1])); c1--;
	c2=mxCalloc(N,sizeof(c2[1])); c2--;
	u=mxCalloc(N,sizeof(u[1])); u--;

	for (klk=1; klk<=nreps; klk++) {
		if (nrhs>2) {/* include initialization */
			s_ = mxGetPr(prhs[2]); s_--;
			for (i=1; i<=N; i++) s[i]=(int)s_[i];
			s_ = NULL;
		} else { /* randomly generate initialiation */
			mxn = mxCreateDoubleScalar(N);
			mexCallMATLAB(1,&mxs, 1,&mxn, "randperm"); s_=mxGetPr(mxs); s_--;
				for (i=1; i<=N; i++) s[i]=(int)s_[i];
				mxDestroyArray(mxs); s_=NULL; mxDestroyArray(mxn);
		}

		fbest = 0.0;
		for (i=1; i<=N; i++) {
			dmax = -9.9e+12;
			for (j=1; j<=p; j++) {
				if (S[i][s[j]]>dmax) {dmax=S[i][s[j]]; jsel=j;}
			}
			fbest=fbest+dmax; c1[i]=s[jsel];
		}
		for (i=1; i<=N; i++) {
			dmax = -9.9e+12;
			for (j=1; j<=p; j++) {
				jj = s[j];
				if (c1[i]==jj) continue;
				if (S[i][jj] > dmax) {dmax=S[i][jj]; jsel=jj;}
			}
			c2[i] = jsel;
		}
		trig = 0;
		while (trig==0) {
			wstar = 0;
			for (goin=p+1; goin<=N; goin++) {
				ii=s[goin]; w=0;
				for (i=1; i<=N; i++) u[i]=0.0;
				for (i=1; i<=N; i++) {
					if (S[i][ii]>S[i][c1[i]]) {
						w = w + S[i][ii] - S[i][c1[i]];
					} else {
						u[c1[i]] = u[c1[i]] + ((S[i][ii]>S[i][c2[i]])?(S[i][ii]):(S[i][c2[i]])) - S[i][c1[i]];
					}
				}
				dmax = -9.9e+12;
				for (j=1; j<=p; j++) {
					jj = s[j];
					if (u[jj]>dmax) {dmax=u[jj]; goout=j;}
				}
				w = w + dmax;
				if (w > wstar) {wstar=w; goinb=goin; gooutb=goout;}
			}
			if (wstar<=0.00001) {trig=1; continue;}
			fbest=fbest+wstar; goinc=s[goinb]; gooutc=s[gooutb];
			idum=s[goinb]; s[goinb]=s[gooutb]; s[gooutb]=idum;
			for (i=1; i<=N; i++) {
				if (c1[i]==gooutc) {
					if (S[i][goinc]>=S[i][c2[i]]) {
						c1[i]=goinc;
					} else {
						c1[i] =	c2[i];
						dmax = -9.9e+12;
						for	(j=1; j<=p; j++) {
							jj = s[j];
							if (c1[i]==jj) continue;
							if (S[i][jj]>dmax) {dmax=S[i][jj]; jsel=jj;}
						}
						c2[i]=jsel;
					}
				} else {
					if (S[i][c1[i]]<S[i][goinc]) {
						c2[i]=c1[i]; c1[i]=goinc;
					} else if (S[i][goinc]>S[i][c2[i]]) {
						c2[i]=goinc;
					} else if (c2[i]==gooutc) {
						dmax = -9.9e+12;
						for	(j=1; j<=p; j++) {
							jj = s[j];
							if (c1[i]==jj) continue;
							if (S[i][jj]>dmax) {
								dmax=S[i][jj]; jsel=jj;
							}
						}
						c2[i]=jsel;
					}
				}
			}
		}
		for (i=1; i<=K; i++) {*exs=s[i]; exs++;}
	}

	/*for (i=1; i<=N; i++) mxFree(++(S[i]));*/ mxFree(++S);
	c1++;mxFree(c1); c2++;mxFree(c2); u++;mxFree(u); s++;mxFree(s);
	return;
}

/*
mex -g vshmex_.c
*/
