#include <superpose.h>
#include <math.h>
#include <Vector3D.h>
#include <Matrix3D.h>

#ifndef M_PI
# define M_PI		3.14159265358979323846	/* pi */
#endif

// #define DEBUG_SUPERPOSE

void
arrayToVec3D(Vector3D& v, float* ary)
{
  v.x(static_cast<double>(ary[0]));
  v.y(static_cast<double>(ary[1]));
  v.z(static_cast<double>(ary[2]));
}

void
vec3DToArray(const Vector3D& v, float* ary)
{
  ary[0] = v.x();
  ary[1] = v.y();
  ary[2] = v.z();
}


/**
 * centrol superposition method, given by Peter Rotkiewitc
 */
float superimpose(float **coords1, float **coords2, int npoints, float **rot, float *mc1, float *mc2)
{
#ifdef DEBUG_SUPERPOSE
  cout << "Starting core superimpose function" << endl;
#endif
  if (npoints < 3) {
    return 0.0; // problem, mc1 mc2, rot not specified
  }
  float mat_s[3][3], mat_a[3][3], mat_b[3][3], mat_g[3][3];
  float mat_u[3][3], tmp_mat[3][3];
  float val, d, d2, alpha, beta, gamma, x, y, z;
  float cx1, cy1, cz1, cx2, cy2, cz2, tmpx, tmpy, tmpz;
  int i, j, k, n;
  const unsigned int NUMBER_STEPS_MAX = 1000; // maximum number of optimization steps
  unsigned int stepCounter = 0;
    cx1=cy1=cz1=cx2=cy2=cz2=0.;

    for (i=0; i<npoints; i++) {
      cx1+=coords1[i][0];
      cy1+=coords1[i][1];
      cz1+=coords1[i][2];
      cx2+=coords2[i][0];
      cy2+=coords2[i][1];
      cz2+=coords2[i][2];
    }

    cx1/=(float)npoints;
    cy1/=(float)npoints;
    cz1/=(float)npoints;

    cx2/=(float)npoints;
    cy2/=(float)npoints;
    cz2/=(float)npoints;

    for (i=0; i<npoints; i++) {
      coords1[i][0]-=cx1;
      coords1[i][1]-=cy1;
      coords1[i][2]-=cz1;
      coords2[i][0]-=cx2;
      coords2[i][1]-=cy2;
      coords2[i][2]-=cz2;
    }

    for (i=0; i<3; i++)
      for (j=0; j<3; j++) {
        if (i==j)
          mat_s[i][j]=mat_a[i][j]=mat_b[i][j]=mat_g[i][j]=1.0;
        else
          mat_s[i][j]=mat_a[i][j]=mat_b[i][j]=mat_g[i][j]=0.0;
        mat_u[i][j]=0.;
      }

    for (n=0; n<npoints; n++) {
      mat_u[0][0]+=coords1[n][0]*coords2[n][0];
      mat_u[0][1]+=coords1[n][0]*coords2[n][1];
      mat_u[0][2]+=coords1[n][0]*coords2[n][2];
      mat_u[1][0]+=coords1[n][1]*coords2[n][0];
      mat_u[1][1]+=coords1[n][1]*coords2[n][1];
      mat_u[1][2]+=coords1[n][1]*coords2[n][2];
      mat_u[2][0]+=coords1[n][2]*coords2[n][0];
      mat_u[2][1]+=coords1[n][2]*coords2[n][1];
      mat_u[2][2]+=coords1[n][2]*coords2[n][2];
    }

    for (i=0; i<3; i++)
      for (j=0; j<3; j++)
        tmp_mat[i][j]=0.;

    do {
      d=mat_u[2][1]-mat_u[1][2];
      d2 = mat_u[1][1]+mat_u[2][2];
      if ((d==0.0) || d2 == 0.0) {
        alpha=0.0; 
      }
      else {
        alpha=atan(d/d2); // /(mat_u[1][1]+mat_u[2][2]));
      }
      if (cos(alpha)*(mat_u[1][1]+mat_u[2][2])+sin(alpha)*(mat_u[2][1]-mat_u[1][2])<0.0) alpha+=M_PI;
      mat_a[1][1]=mat_a[2][2]=cos(alpha);
      mat_a[2][1]=sin(alpha);
      mat_a[1][2]=-mat_a[2][1];
      for (i=0; i<3; i++)
        for (j=0; j<3; j++)
          for (k=0; k<3; k++)
            tmp_mat[i][j]+=mat_u[i][k]*mat_a[j][k];
      for (i=0; i<3; i++)
        for (j=0; j<3; j++) {
          mat_u[i][j]=tmp_mat[i][j];
          tmp_mat[i][j]=0.;
        }
      for (i=0; i<3; i++)
        for (j=0; j<3; j++)
          for (k=0; k<3; k++)
            tmp_mat[i][j]+=mat_a[i][k]*mat_s[k][j];
      for (i=0; i<3; i++)
        for (j=0; j<3; j++) {
          mat_s[i][j]=tmp_mat[i][j];
          tmp_mat[i][j]=0.;
        }
      d=mat_u[0][2]-mat_u[2][0];
      d2 = mat_u[0][0]+mat_u[2][2];
      if ((d==0.0) || (d2 == 0.0)) {
        beta=0.0; 
      }
      else {
        beta=atan(d/d2);
      }
      if (cos(beta)*(mat_u[0][0]+mat_u[2][2])+sin(beta)*(mat_u[0][2]-mat_u[2][0])<0.0) beta+=M_PI;
      mat_b[0][0]=mat_b[2][2]=cos(beta);
      mat_b[0][2]=sin(beta);
      mat_b[2][0]=-mat_b[0][2];
      for (i=0; i<3; i++)
        for (j=0; j<3; j++)
          for (k=0; k<3; k++)
            tmp_mat[i][j]+=mat_u[i][k]*mat_b[j][k];
      for (i=0; i<3; i++)
        for (j=0; j<3; j++) {
          mat_u[i][j]=tmp_mat[i][j];
          tmp_mat[i][j]=0.;
        }
      for (i=0; i<3; i++)
        for (j=0; j<3; j++)
          for (k=0; k<3; k++)
            tmp_mat[i][j]+=mat_b[i][k]*mat_s[k][j];
      for (i=0; i<3; i++)
        for (j=0; j<3; j++) {
          mat_s[i][j]=tmp_mat[i][j];
          tmp_mat[i][j]=0.;
        }
      d=mat_u[1][0]-mat_u[0][1];
      d2 = mat_u[0][0]+mat_u[1][1];
      if ((d==0.0) || (d2 == 0.0)) {
        gamma=0.0; 
      }
      else {
        gamma=atan(d/d2);
      }
      if (cos(gamma)*(mat_u[0][0]+mat_u[1][1])+sin(gamma)*(mat_u[1][0]-mat_u[0][1])<0.0) gamma+=M_PI;
      mat_g[0][0]=mat_g[1][1]=cos(gamma);
      mat_g[1][0]=sin(gamma);
      mat_g[0][1]=-mat_g[1][0];
      for (i=0; i<3; i++)
        for (j=0; j<3; j++)
          for (k=0; k<3; k++)
            tmp_mat[i][j]+=mat_u[i][k]*mat_g[j][k];
      for (i=0; i<3; i++)
        for (j=0; j<3; j++) {
          mat_u[i][j]=tmp_mat[i][j];
          tmp_mat[i][j]=0.;
        }
      for (i=0; i<3; i++)
        for (j=0; j<3; j++)
          for (k=0; k<3; k++)
            tmp_mat[i][j]+=mat_g[i][k]*mat_s[k][j];
      for (i=0; i<3; i++)
        for (j=0; j<3; j++) {
          mat_s[i][j]=tmp_mat[i][j];
          tmp_mat[i][j]=0.;
        }
      val=fabs(alpha)+fabs(beta)+fabs(gamma);
    } while ((val>0.0001) && (stepCounter++ < NUMBER_STEPS_MAX)); // added additional break when too many steps (Eckart Bindewald 2004)

    val=0.;
    for (i=0; i<npoints; i++) {
      x=coords2[i][0];
      y=coords2[i][1];
      z=coords2[i][2];
      tmpx=x*mat_s[0][0]+y*mat_s[0][1]+z*mat_s[0][2];
      tmpy=x*mat_s[1][0]+y*mat_s[1][1]+z*mat_s[1][2];
      tmpz=x*mat_s[2][0]+y*mat_s[2][1]+z*mat_s[2][2];
      x=coords1[i][0]-tmpx;
      y=coords1[i][1]-tmpy;
      z=coords1[i][2]-tmpz;
      val+=x*x+y*y+z*z;
    }

    for (i=0; i<npoints; i++) {
      coords1[i][0]+=cx1;
      coords1[i][1]+=cy1;
      coords1[i][2]+=cz1;
      coords2[i][0]+=cx2;
      coords2[i][1]+=cy2;
      coords2[i][2]+=cz2;
    }

    if (rot) {
      for (i=0; i<3; i++)
        for (j=0; j<3; j++)
          rot[i][j]=mat_s[i][j];
    }

    if (mc1) {
      mc1[0]=cx1;
      mc1[1]=cy1;
      mc1[2]=cz1;
    }

    if (mc2) {
      mc2[0]=cx2;
      mc2[1]=cy2;
      mc2[2]=cz2;
    }
#ifdef DEBUG_SUPERPOSE
  cout << "Ending core superimpose function" << endl;
#endif
  return sqrt(val/(float)npoints);
}

double
superimpose(const Vec<Vector3D>& v1, Vec<Vector3D>& v2, 
	    Matrix3D& m, Vector3D& c1, Vector3D& c2)
{
  PRECOND(v1.size() > 2);
  PRECOND(v1.size() == v2.size());
#ifdef DEBUG_SUPERPOSE
  cout << "Starting wrapper superimpose function" << endl;
#endif
  unsigned int n = v1.size();
  float * v1Float[1000]; //  = * float[n];
  float * v2Float[1000]; //  = new * float[n];  
  ERROR_IF(n >= 1000, "Too large number of points!");
  for (unsigned int i = 0; i < n; ++i) {
    v1Float[i] = new float[3];
    v2Float[i] = new float[3];
  }
  float * rot[3];
  float row0[3];
  float row1[3];
  float row2[3];
  rot[0] = row0;
  rot[1] = row1;
  rot[2] = row2;
  float mc1[3];
  float mc2[3];
  for (unsigned int i = 0; i < 3; ++i) {
    mc1[i] = 0.0;
    mc2[i] = 0.0;
    for (unsigned int j = 0; j < 3; ++j) {
      rot[i][j] = 0.0;
    }
  }
  for (unsigned int i = 0; i < v1.size(); ++i) {
    vec3DToArray(v1[i], v1Float[i]);
    vec3DToArray(v2[i], v2Float[i]);
  }
  float resultFloat= superimpose(v1Float, v2Float, v1.size(), rot, mc1, mc2);
  // copy to array
  double xzerocheck = v2Float[0][0];
  if ((v2[0].x() - xzerocheck) < 0.01) {
    cout << "First x coordinate has not changed!!!" << v2[0].x() << endl;
  }
  for (unsigned int i = 0; i < n; ++i) {
    v2[i].x(v2Float[i][0]);
    v2[i].y(v2Float[i][1]);
    v2[i].z(v2Float[i][2]);
  }
  arrayToVec3D(c1, mc1);
  arrayToVec3D(c2, mc2);

  m = Matrix3D(rot[0][0], rot[0][1], rot[0][2],
	       rot[1][0], rot[1][1], rot[1][2],
	       rot[2][0], rot[2][1], rot[2][2]);

  // release memory:
  for (unsigned int i = 0; i < n; ++i) {
    delete[] v1Float[i];
    delete[] v2Float[i];
  }
//    delete[] v1Float;
//    delete[] v2Float;

#ifdef DEBUG_SUPERPOSE
  cout << "Ending wrapper superimpose function" << endl;
#endif

  return static_cast<double>(resultFloat);
}

