// --*- C++ -*------x---------------------------------------------------------
#ifndef __INTERACTION_CLUSTER_ANALYZER__
#define __INTERACTION_CLUSTER_ANALYZER__

#include <Stem.h>
#include <StemTools.h>
#include <StringTools.h>
#include <clusterAlgorithms.h>
#include <vectornumerics.h>
#include <generalNumerics.h>
#include <realnumerics.h>
#include <MAFAlignment.h>
#include <MAFAlignmentTools.h>
#include <MainTools.h>
#include <BirthdayProb.h>
#include <map>
#include <CorrelationFinder.h>

using namespace std;

class InteractionClusterAnalyzer {
  
private:

  /** Used to solve "Birthday problem": used to compute bias towards antidiagonals for pairs of columns that show covariation.
   * Uses pointer because it allocates a large amount of memory. This prevents problems with limited stack memory. */
  BirthdayProb birthdays;
  
public:
  
  typedef Vec<Stem>::size_type size_type;
  typedef Stem::index_type index_type;
  typedef pair<IntervallInt, IntervallInt> dimension_type;
  typedef MAFAlignment::length_type length_type;
  typedef MAFAlignment::count_hash_type count_hash_type;
  typedef map<string, double> double_hash_type;
  typedef CorrelationFinder::result_container result_container;

  enum { INTERACTION_UNCLASSIFIED = 0, INTERACTION_OVERLAPPING = 1, INTERACTION_LOCAL = 2, INTERACTION_MEDIUM = 3, 
	 INTERACTION_FAR = 4, INTERACTION_INTERCHROM = 5 };

  enum { LOCAL_MAX = 200, MEDIUM_MAX = 6000 };

  enum { NO_CLUSTERING = 0, MTEST_TOTAL_AREA = 1, MTEST_CLUSTER_AREA = 2, MTEST_CLUSTER_AREA_SMALL = 3 };
public:
  
  InteractionClusterAnalyzer() { }

  virtual ~InteractionClusterAnalyzer() { }
  
  int classifyInteraction(index_type r1min, index_type r1max, index_type r2min, index_type r2max) {
    if (! ((r1max < r2min) || (r2max < r1min))) {
      return INTERACTION_OVERLAPPING;
    }
    index_type d1 = r2min - r1max;
    index_type d2 = r1min - r2max;
    index_type d = d1;
    if ((d < 0) || ((d2 >= 0) && (d2 < d1))) {
      d = d1;
    }
    ASSERT(d >= 0);
    if (d < LOCAL_MAX) {
      return INTERACTION_LOCAL;
    } else if (d < MEDIUM_MAX) {
      return INTERACTION_MEDIUM;
    } 
    return INTERACTION_FAR;
  }

  /** Classifies an interaction into overlapping, local, medium and far */
  int classifyInteraction(const dimension_type& dimensions) {
    return classifyInteraction(dimensions.first.getLower(), dimensions.first.getUpper(),
			       dimensions.second.getLower(), dimensions.second.getUpper());
  }

  /** Classifies an interaction into overlapping, local, medium and far */
  string convertToInteractionName(int interactionClassification) {
    switch (interactionClassification) {
    case INTERACTION_UNCLASSIFIED: return "unclassified";
      break;
    case INTERACTION_OVERLAPPING: return "overlapping";
      break;
    case INTERACTION_LOCAL: return "local";
      break;
    case INTERACTION_MEDIUM: return "medium";
      break;
    case INTERACTION_FAR: return "far";
      break;
    case INTERACTION_INTERCHROM: return "interchrom";
      break;
    default:
      ERROR("Internal error: Unknown interaction classification code!");
    }
    return "unclassified";
  }

  void classifyInteractionTest() {
    ASSERT(classifyInteraction(2,5,3,7) == INTERACTION_OVERLAPPING);
    ASSERT(classifyInteraction(2,5,8,20) == INTERACTION_LOCAL);
    ASSERT(classifyInteraction(50,55,101,141) == INTERACTION_MEDIUM);
    ASSERT(classifyInteraction(2,5,8000,20000) == INTERACTION_FAR);
  }

  /** Compute the area corresonding to a cluster of stems. Unit: nucleotides squared */
  dimension_type computeClusterDimensions(const Vec<Stem>& stems);


  /** How likely is it, to see similar amount of covariation in row-shuffled alignments? */
  virtual Vec<double> estimateClusterShuffleProbabilities(const Vec<dimension_type>& clusterDimensions,
						     const Vec<Vec<Stem> >& clusterStems,
						     const MAFAlignment& maf1,
						     const MAFAlignment& maf2,
						     bool useInternalCoordinates) {
    Vec<double> result(clusterDimensions.size(), 0.0);
    for (Vec<double>::size_type i = 0; i < result.size(); ++i) {
      result[i] = estimateClusterShuffleProbability(clusterDimensions[i], clusterStems[i], maf1, maf2, 
						    useInternalCoordinates);
      
    }
    return result;
  }

  /** How likely is it, to see similar amount of covariation in row-shuffled alignments?
   * Apply to one cluster. */
  virtual double estimateClusterShuffleProbability(const dimension_type& cluster,
						   const Vec<Stem>& clusterStems,
						   const MAFAlignment& maf1,
						   const MAFAlignment& maf2,
						   bool useInternalCoordinates) {
    return 0.0;
  }
  
  /** Compute the area corresonding to a cluster of stems. Unit: nucleotides squared. A single stem corresponds to area 1 */
  double computeClusterArea(const dimension_type& dimensions, double border) { 
    double dx = dimensions.first.getUpper() - dimensions.first.getLower() + (2*border) + 1; // boundaries are inclusive and zero-based
    double dy = dimensions.second.getUpper() - dimensions.second.getLower() + (2*border) + 1; // boundaries are inclusive and zero-based
    ASSERT(dx > 0);
    ASSERT(dy > 0);
    double area = dx * dy;
    POSTCOND(area >= (4*border*border));
    return area;
  }

  /** Compute the area corresonding to a cluster of stems. Unit: nucleotides squared. A single stem corresponds to area 1 */
  double computeClusterArea(const Vec<Stem>& stems, double border) {
    dimension_type dimensions = computeClusterDimensions(stems);
    return computeClusterArea(dimensions,border);
  }
  
  Vec<Vec<unsigned int> > clusterStems(const Vec<Stem>& stems, double cutoff) {
    // REMARK << "Clustering subset of " << stems.size() << " stems (single-linkage, cutoff: " << cutoff << " )" << endl;
    Vec<Vec<double> > distances = StemTools::convertStemEndsToDistanceMatrix(stems);
    /*       if (verbose > 1) { */
    /* 	REMARK << "Distance matrix:" << endl; */
    /* 	cout << distances << endl; */
    /*       } */
    Vec<Vec<unsigned int> > clusters = singleLinkage(distances, cutoff);
    return clusters;
  }

  /** Clusters stems; clusters of size one consisting of one stem of length one are not stored */
  Vec<Vec<unsigned int> > clusterStemsFast(const Vec<Stem>& stems, double cutoff, bool ignoreSingles) {
    REMARK << "Clustering stems (single-linkage, cutoff: " << cutoff << " )" << endl;
    ASSERT(singleLinkage1dTest());
    ASSERT(StemTools::stemDistTest());
    Vec<double> starts(stems.size());
    double cutoff1d = cutoff + 20; // 100; // because of fancy distance measure between stems: consider cutoff1d = cutoff + 100, meaning stems with length greater 100 are really unlikely; in other words: preclustering is on "safe side"
    for (Vec<Stem>::size_type i = 0; i < stems.size(); ++i) {
      starts[i] = static_cast<double>(stems[i].getStart());
    }
    Vec<Vec<unsigned int> > startClusters = singleLinkage1d(starts, cutoff1d); // fast preclustering using only first coordinate
    Vec<Vec<unsigned int> > finalClusters;
    for (Vec<Vec<unsigned int> >::size_type i = 0; i < startClusters.size(); ++i) { // should be much smaller size
      Vec<Stem> subsetStems = getSubset(stems, startClusters[i]);
      Vec<double> stops(subsetStems.size());
      for (Vec<Stem>::size_type j = 0; j < subsetStems.size(); ++j) {
	stops[j] = static_cast<double>(subsetStems[j].getStop());
      }
      Vec<Vec<unsigned int> > stopClusters = singleLinkage1d(stops, cutoff1d); // fast preclustering using only second coordinate
      for (Vec<Vec<unsigned int> >::size_type m = 0; m < stopClusters.size(); ++m) { // should be much smaller size
	Vec<Stem> subsubsetStems = getSubset(subsetStems, stopClusters[m]);
	Vec<Vec<unsigned int> > subClusters = clusterStems(subsubsetStems, cutoff);
	for (Vec<Vec<unsigned int> >::size_type j = 0; j < subClusters.size(); ++j) {
	  Vec<unsigned int> newCluster;
	  ASSERT(newCluster.size() == 0);
	  for (Vec<Vec<unsigned int> >::size_type k = 0; k < subClusters[j].size(); ++k) {
	    unsigned int id = startClusters[i][stopClusters[m][subClusters[j][k]]];
	    ERROR_IF(id >= stems.size(), "Internal error lin line 169 while clustering stems!");
	    newCluster.push_back(id); // make sure correct ids are used!
	  }
	  ERROR_IF(newCluster.size() == 0, "Internal error in line 172 while clustering stems!");
	  ERROR_IF(newCluster[0] >= stems.size(), "Internal error in line 173 while clustering stems!");
	  if ((!ignoreSingles) || (newCluster.size() > 1) || (stems[newCluster[0]].getLength() > 1)) {
	    finalClusters.push_back(newCluster);
	  }
	}
      }
    } 
    return finalClusters;
  }

  /** count total number of stems; HOWEVER: longer stems are broken up for this counting into stems of length stemLengthMin */
  size_type countSubStems(const Vec<Stem>& stems, Stem::index_type stemLengthMin) {
    size_type count = 0;
    for (Vec<Stem>::size_type i = 0; i < stems.size(); ++i) {
      count += stems[i].getLength() / stemLengthMin;
      ASSERT((stems[i].getLength() / stemLengthMin) >= 1); // otherwise stem too short
    } 
    ASSERT(count >= stems.size());
    return count;
  }

  /** Generates vector containing substems */
  Vec<Stem> expandStems(const Vec<Stem>& stems, Stem::index_type stemLengthMin, bool reverseMode) {
    Vec<Stem> result;
    for (Vec<Stem>::size_type i = 0; i < stems.size(); ++i) {
      Vec<Stem> expanded = StemTools::expandStem(stems[i], stemLengthMin, reverseMode);
      for (Vec<Stem>::size_type j = 0; j < expanded.size(); ++j) {
	result.push_back(expanded[j]);
      }
    }
    return result;
  }

  /** Using binomial theorem, computes p-value, how likely it is
   * to see such a distribution of stems, given the hypothesis
   * that the base pairs are random */
  Vec<index_type> computeStemInvariantHistogram(const Vec<Stem>& stems, index_type cutoff, bool reverseMode) {
    index_type invariantMin = stems[0].getInvariant(reverseMode);
    index_type invariantMax = stems[0].getInvariant(reverseMode);
    for (Vec<Stem>::size_type i = 1; i < stems.size(); ++i) {
      if (stems[i].getInvariant(reverseMode) < invariantMin) {
	invariantMin = stems[i].getInvariant(reverseMode);
      }
      if (stems[i].getInvariant(reverseMode) > invariantMax) {
	invariantMax = stems[i].getInvariant(reverseMode);
      }
    }
    invariantMin -= cutoff;
    invariantMax += cutoff;
    index_type numInvariants = invariantMax - invariantMin + 1;
    Vec<index_type> histogram(numInvariants, 0);
    for (Vec<Stem>::size_type i = 0; i < stems.size(); ++i) {
      ASSERT(stems[i].getInvariant(reverseMode) - invariantMin < static_cast<index_type>(histogram.size()));
      histogram[stems[i].getInvariant(reverseMode) - invariantMin] += stems[i].getLength();
    }
    return histogram;
  }

  /** Using binomial theorem, computes p-value, how likely it is
   * to see such a distribution of stems, given the hypothesis
   * that the base pairs are random */
  double computeSparseHistogramBiasP_old(const Vec<index_type>& histogram) {
    index_type  n = 0;
    index_type zeros = 0;
    index_type nonzeros = 0;
    index_type c = histogram.size();
    for (size_type i = 0; i < histogram.size(); ++i) {
      n += histogram[i];
      if (histogram[i] == 0) {
	++zeros;
      } else {
	++nonzeros;
      }
    }
    ASSERT(n > 0);
    if (zeros == 0) {
      cout << "# All stem invariants were found in a cluster. Estimation of p-value is for this case not implemented."
	   << endl;
      return 1.0;
    } 
    double p = 0.0;
    // sum probabilities of this and less likely scenarios:
    for (index_type i = nonzeros; i > 0; --i) {
      p += computeStemInvariantProbBinom(c, n, i);
      // p += exp(logMultinomial(histogram, defProbVec, n));
    }
    POSTCOND(isDefined(p));
    return p;
  }

  /** Using binomial theorem, computes p-value, how likely it is
   * to see such a distribution of stems, given the hypothesis
   * that the base pairs are random */
  double computeSparseHistogramBiasP(const Vec<index_type>& histogram) {
    index_type  n = 0;
    index_type nonzeros = 0;
    index_type c = static_cast<index_type>(histogram.size());
    for (size_type i = 0; i < histogram.size(); ++i) {
      ASSERT(histogram[i] >= 0); 
      n += histogram[i];
      if (histogram[i] > 0) {
	++nonzeros;
      }
    }
    ASSERT(n > 0);
    ASSERT(nonzeros <= c);
    // probability of this and less likely scenarios:
    //    if (c > static_cast<index_type>(birthdays.size())) {
    // REMARK << "Cannot compute exact stem bias P value of cluster because its number of stem-invariants " 
    // << c << " is larger than " << birthdays.size() << endl;
    // }
    double p = birthdays.exactP(c, n, nonzeros); // computeStemInvariantP_new(c, n, nonzeros);
    // p += exp(logMultinomial(histogram, defProbVec, n));
    POSTCOND(isDefined(p));
    if (p > 1.0) {
      cout << "# Strange P-value found: " << p << endl;
      p = 1.0;
    }
    if (p < 0.0) {
      cout << "# Strange P-value found: " << p << endl;
      p = 0.0;
    }
    POSTCOND((p >= 0) && (p <= 1.0));
    return p;
  }

  /** Computes p-value, how likely it is
   * to see such a distribution of stems, given the hypothesis
   * that the base pairs are random 
   * theory: p(k,n,m) = (fac(m)*pow(m,n)*nOverK(k,m)) / (fac(n)*pow(n,n) * nOverK(k, n))
   * log(p(k,n,m)) = n*log(m) + log(nOver(k,m)) - n * log(n) -log(nOverK(k,n)  
   * = n*(log(m)-log(k)) + log(nOverK(k,m)) - log(nOverK(k,n)) 
   * @param numCats number of categories (above: k)
   * @param numTrials number of trials (above: n) 
   * @param nonzeros number of categories that never occurred as outcome in any of the trials (above: m)
   * @result p-value : probability of this or less likely data to occur under the assumption that 
   * all categories have equal probability of occurence.                
   */
  double computeStemInvariantP_new(index_type numCats, index_type numTrials, index_type nonzeros) {
    ASSERT(numCats > 0);
    ASSERT(numTrials > 0);
    ASSERT(nonzeros > 0);
    ASSERT(nonzeros <= numCats);
    ASSERT(numTrials <= numCats); // careful: might not always be the case
    // double frac = static_cast<double>(nonzeros) / static_cast<double>(numCats);
    double logNumerator = numTrials * log((double)nonzeros) + logNOverK(numCats, nonzeros) + logFactorial(nonzeros);
    double logDenom     = numTrials * log((double)numTrials) + logNOverK(numCats, numTrials) + logFactorial(numTrials) ;
    double logprob = logNumerator - logDenom;
//     double logprob = (numTrials * (log((double)nonzeros)-log((double)numCats))) 
//       + logNOverK(numCats, nonzeros) - logNOverK(numCats, numTrials);
    double prob = exp(logprob);
    if (prob > 1.0) {
      if (nonzeros < numTrials) {
	cout << "# WARNING: weird stem bias probability: " << exp(logprob) << " " << numCats << " " << numTrials << " " << nonzeros << endl;
      }
      prob = 1.0;
    }
    POSTCOND(isDefined(prob));
    POSTCOND(prob >= 0.0);
    POSTCOND(prob <= 1.0);
    return prob;
  }


  /** Computes p-value, how likely it is
   * to see such a distribution of stems, given the hypothesis
   * that the base pairs are random 
   * numTrials; probability that position 1 is never chosen:
   * p0 = 1/numCats : probability of a field being chosen per trial; (1-p0): probability of not being chosen
   * frac = nonzeros / numTrials
   * 
   * theory: prob(k,n,m) = nOverK(numCats, numTrials) * pow(nonzeros/numTrials, numTrials)
   * 
   * @param numCats number of categories (above: k)
   * @param numTrials number of trials (above: n) 
   * @param nonzeros number of categories that never occurred as outcome in any of the trials (above: m)
   * @result p-value : probability of this or less likely data to occur under the assumption that 
   * all categories have equal probability of occurence.                
   */
  double computeStemInvariantProbBinom(index_type numCats, index_type numTrials, index_type nonzeros) {
    ASSERT(numCats > 0);
    ASSERT(numTrials > 0);
    ASSERT(nonzeros > 0);
    ASSERT(nonzeros <= numCats);
    ASSERT(numTrials <= numCats); // careful: might not always be the case
    double logprob = logNOverK(numCats, numTrials) + numTrials * (log(nonzeros) - log(numCats));
    double prob = exp(logprob);
    if (prob > 1.0) {
      if (nonzeros < numTrials) {
	cout << "# WARNING: weird stem bias probability: " << exp(logprob) << " " << numCats << " " << numTrials << " " << nonzeros << endl;
      }
      prob = 1.0;
    }
    POSTCOND(isDefined(prob));
    POSTCOND(prob >= 0.0);
    POSTCOND(prob <= 1.0);
    return prob;
  }


  /** Computes p-value, how likely it is
   * to see such a distribution of stems, given the hypothesis
   * that the base pairs are random 
   * numTrials; probability that position 1 is never chosen:
   * p0 = 1/numCats : probability of a field being chosen per trial; (1-p0): probability of not being chosen
   * frac = nonzeros / numTrials
   * 
   * theory: prob(k,n,m) = nOverK(numCats, numTrials) * pow(nonzeros/numTrials, numTrials)
   * 
   * @param numCats number of categories (above: k)
   * @param numTrials number of trials (above: n) 
   * @param nonzeros number of categories that never occurred as outcome in any of the trials (above: m)
   * @result p-value : probability of this or less likely data to occur under the assumption that 
   * all categories have equal probability of occurence.                
   */
  double computeStemInvariantPBinom(index_type numCats, index_type numTrials, index_type nonzeros) {
    double p = 0;
    for (index_type i = nonzeros; i > 0; --i) {
      p += computeStemInvariantProbBinom(numCats, numTrials, nonzeros);
    }
    return p;
  }

  /** Computes p-value, how likely it is
   * to see such a distribution of stems, given the hypothesis
   * that the base pairs are random 
   * theory: p(k,n,m) = (fac(m)*pow(m,n)*nOverK(k,m)) / (fac(n)*pow(n,n) * nOverK(k, n))
   * log(p(k,n,m)) = n*log(m) + log(nOver(k,m)) - n * log(n) -log(nOverK(k,n)  
   * = n*(log(m)-log(k)) + log(nOverK(k,m)) - log(nOverK(k,n)) 
   * @param numCats number of categories (above: k)
   * @param numTrials number of trials (above: n) 
   * @param nonzeros number of categories that never occurred as outcome in any of the trials (above: m)
   * @result p-value : probability of this or less likely data to occur under the assumption that 
   * all categories have equal probability of occurence.                
   */
  double computeStemInvariantP(index_type numCats, index_type numTrials, index_type nonzeros) {
    return computeStemInvariantP_new(numCats, numTrials, nonzeros);
    // return computeStemInvariantPBinom(numCats, numTrials, nonzeros);
  }

  /** Using binomial theorem, computes p-value, how likely it is
   * to see such a distribution of stems within ONE cluster, given the hypothesis
   * that the base pairs are random */
  double computeStemInvariantBiasP(const Vec<Stem>& stems, double clusterCutoff, bool reverseMode) {
    for (size_type i = 0; i < stems.size(); ++i) {
      if (stems[i].getLength() > 1) {
	// cout << "Found stems with length greater 1." << endl;  
	return computeStemInvariantBiasP(expandStems(stems, 1, reverseMode), clusterCutoff, reverseMode); // apply method to case with all stems having length 1 (single base pairs )
      }
    }
    for (size_type i = 0; i < stems.size(); ++i) {
      if (stems[i].getLength() > 1) {
	cout << "Internal error: Found stems with length greater 1." << endl;  
	ASSERT(false);
      }
    }
    Vec<index_type> histogram = computeStemInvariantHistogram(stems, static_cast<index_type>(clusterCutoff), reverseMode);
    // cout << "Histogram: " << histogram << endl;
    // return computeSparseHistogramBiasP_old(histogram);
    return computeSparseHistogramBiasP(histogram);
  }

  bool stemInvariantsOk(const Vec<Stem>& stems, index_type stemInvariantTypeCountMin, index_type stemInvariantCountMin,
			bool reverseMode) const {
    map<index_type, index_type> invariants;
    for (size_type i = 0; i < stems.size(); ++i) {
      index_type invar = stems[i].getInvariant(reverseMode);
      map<index_type, index_type>::const_iterator fit = invariants.find(invar);
      if (fit == invariants.end()) {
	invariants[invar] = stems[i].getLength();
      } else {
	index_type sofar = fit->second;
	invariants[invar] = sofar + stems[i].getLength();
      }
    }
    index_type countGood = 0;
    for (map<index_type, index_type>::const_iterator it = invariants.begin(); it != invariants.end(); it++) {
      if (it->second >= stemInvariantCountMin) {
        ++countGood;
      } 
    } 
    return countGood >= stemInvariantTypeCountMin;
  }

  bool stemInvariantsOkTest() const {
    index_type stemInvariantTypeCountMin = 2;
    index_type stemInvariantCountMin = 2;
    Vec<Stem> stems;
    ASSERT(!stemInvariantsOk(stems, stemInvariantTypeCountMin, stemInvariantCountMin, true));
    stems.push_back(Stem(3,18,2));
    ASSERT(!stemInvariantsOk(stems, 2,2, true));
    ASSERT(stemInvariantsOk(stems, 1, 2, true));
    stems.push_back(Stem(35,18,1));
    ASSERT(!stemInvariantsOk(stems, stemInvariantTypeCountMin, stemInvariantCountMin, true));
    stems.push_back(Stem(35,28,2)); // second stem of length 2. Should be ok now:
    ASSERT(stemInvariantsOk(stems, stemInvariantTypeCountMin, stemInvariantCountMin, true));
    return true;
  }

  /** Analyzer and cluster found stems */
  void analyzeClusters(ostream& os, ostream& bos,
		       Vec<Stem>& stems,
		       double clusterCutoff,
		       double totalLength1,
		       double totalLength2,
		       bool sameStrandMode,
		       double stemDensity,
		       double eMax,
		       double stemPMax, 
		       index_type stemLengthMin,
		       size_type clusterColMin,
		       int expandClusterMaxAllowed, 
		       bool complementMode,
		       bool reverseMode,
		       const MAFAlignment& maf,
		       const MAFAlignment& maf2,
		       const count_hash_type& matchCounts,
		       const count_hash_type& assemblyCombLength1,
		       const count_hash_type& assemblyCombLength2,
		       bool useEnergiesAsDensities,
		       int multiTestMode,
		       double_hash_type densities,
		       bool stemBiasPMode);

  /** Analyzer and cluster found stems */
  void analyzeClustersFast(ostream& os, ostream& bos,
		       Vec<Stem>& stems,
		       double clusterCutoff,
		       double totalLength1,
		       double totalLength2,
		       bool sameStrandMode,
		       double stemDensity,
		       double eMax,
		       double stemPMax, 
		       index_type stemLengthMin,
		       size_type clusterColMin,
		       int expandClusterMaxAllowed, 
		       bool complementMode,
		       bool reverseMode,
		       const MAFAlignment& maf,
		       const MAFAlignment& maf2,
		       const count_hash_type& matchCounts,
		       const count_hash_type& assemblyCombLength1,
		       const count_hash_type& assemblyCombLength2,
		       bool useEnergiesAsDensities,
		       int multiTestMode,
		       double_hash_type densities);

  /** Returns for given assembly combination hash the densities */
  static double_hash_type generateDensities(const count_hash_type& matchCounts,
					    const count_hash_type& assemblyCombLength1);


  /** Returns for given assembly combination hash the densities */
  static double_hash_type generateDensities(const count_hash_type& matchCounts,
					    const count_hash_type& assemblyCombLength1,
					    const count_hash_type& assemblyCombLength2);


  static void augmentCorrelationDensitities(result_container& results,
					    const double_hash_type& densities,
					    const MAFAlignment& maf,
					    const MAFAlignment& maf2, bool internal) {
    for (result_container::size_type i = 0; i < results.size(); ++i) {
      if (maf2.size() > 0) {
	results[i].setScore(findHighestDensity(results[i], densities, maf, maf2, internal, 0));
      } else {
	results[i].setScore(findHighestDensity(results[i], densities, maf, internal, 0));
      }
      if (results[i].getScore() < 0) {
	// can happen, see findDensity method
// 	if (verbose > 1) {
// 	  cout << "# Strange: correlation density is less than zero: " << results[i] << endl;
// 	}
      }
  }
}

  static double_hash_type readDensities(istream& is) {
    size_t n;
    is >> n;
    double_hash_type result;
    for (size_t i = 0; i < n; ++i) {
      string hash;
      string density;
      ERROR_IF(!is, "Error reading density input file!");
      is >> hash >> density;
      pair<string, double> p(hash, stod(density));
      result.insert(p);
    }
    return result;
  }

  static double_hash_type writeDensities(ostream& os, const double_hash_type& densities ) {
    os << densities.size() << endl;
    double_hash_type result;
    for (double_hash_type::const_iterator it = densities.begin(); it != densities.end(); it++) {
      os << it->first << "\t" << it->second << endl; 
    }
    return result;
  }

private:



  /** find highest density in cluster area.
   * If internalMode is true: assume internal coordinates, otherwise coordinates are assumbed to be 0-based assembly coordinates and not internal coordinates 
   */
  static double findDensity(length_type pos,
			    const double_hash_type& densitities,
			    const MAFAlignment& maf,
			    bool internalMode, int verboseLevel); 
  
  /** find highest density in cluster area. Coordinates are assumbed to be 0-based assembly coordinates and not internal coordinates! */
  static double findHighestDensity(const IntervallInt& interval,
				   const double_hash_type& densitities,
				   const MAFAlignment& maf,
				   bool internalMode, int verboseLevel);

  /** find highest density in cluster area. Coordinates are assumbed to be 0-based assembly coordinates and not internal coordinates! */
  static double findHighestDensity(const Correlation& correlation,
				   const double_hash_type& densitities,
				   const MAFAlignment& maf,
				   bool internalMode, int verboseLevel);

  /** find highest density in cluster area. Coordinates are assumbed to be 0-based assembly coordinates and not internal coordinates! */
  static double findHighestDensity(const Correlation& correlation,
				   const double_hash_type& densitities,
				   const MAFAlignment& maf,
				   const MAFAlignment& maf2,
				   bool internalMode, int verboseLevel);
  
  /** find highest density in cluster area */
  static double findHighestDensity(const dimension_type& clusterDimensions,
				   const double_hash_type& densitities,
				   const MAFAlignment& maf,
				   bool internalMode, int verboseLevel);

  /** find highest density in cluster area */
  static double findHighestDensity2(const dimension_type& clusterDimensions,
				   const double_hash_type& densitities,
				   const MAFAlignment& maf,
				   bool internalMode, int verboseLevel);
  
  /** find highest density in cluster area */
  static double findHighestDensity(const dimension_type& clusterDimensions,
				   const double_hash_type& densitities,
				   const MAFAlignment& maf,
				   const MAFAlignment& maf2,
                                   bool internalMode, int verboseLevel);

  /** find highest density in cluster area */
  static double findHighestDensity2(const dimension_type& clusterDimensions,
				   const double_hash_type& densitities,
				   const MAFAlignment& maf,
				   const MAFAlignment& maf2,
                                   bool internalMode, int verboseLevel);

};

#endif
