#include <HashCorrelationFinder3.h>
#include <NucleotideTools.h>
#include <cctype>
#include <ContainerTools.h>
#include <iomanip>
#include <iterator>
#include <queue>
#include <RankedSolution3.h>
#include <RankedSolution5.h>
#include <CorrelationTools.h>
#include <MAFAlignmentTools.h>
#include <StringTools.h> // for function findPositions

/** Check it i,j co-occurs with i-1,j+1 or i+1,j-1. Makes sense for RNA helix interactions.
 */
set<HashCorrelationFinder3::length_type>
HashCorrelationFinder3::filterNeighborCorrelations(const set<length_type>& leftSet, const set<length_type>& middleSet, 
						   const set<length_type>& rightSet) const {
  set<length_type> result;
  for (set<length_type>::const_iterator it = middleSet.begin(); it != middleSet.end(); it++) {
    if (reverseMode) { // standard case: looking for reverse complements 
      switch (stemLengthMin) {
      case 2: // if ((rightSet.find((*it) - 1) != rightSet.end())) { // ignore "leftSet"
	if ((leftSet.find((*it) + 1) != leftSet.end()) || (rightSet.find((*it) - 1) != rightSet.end())) {
	  result.insert(*it);
	}
	break;
      case 3: if ((leftSet.find((*it) + 1) != leftSet.end()) || (rightSet.find((*it) - 1) != rightSet.end())) {
	result.insert(*it);
      }
	break;
      default:
	ERROR("Stem length filtering can be between 2 and 3!");
      }
	
    } else { // looking for forward matches
      switch (stemLengthMin) {
      case 2: // if ((rightSet.find((*it) + 1) != rightSet.end())) { // ignore "leftSet"
	if ((leftSet.find((*it) - 1) != leftSet.end()) || (rightSet.find((*it) + 1) != rightSet.end())) {
	  result.insert(*it);
	}
	break;
      case 3:
	if ((leftSet.find((*it) - 1) != leftSet.end()) || (rightSet.find((*it) + 1) != rightSet.end())) {
	  result.insert(*it);
	}
	break;
      default:
	ERROR("Stem length filtering can be between 2 and 3!");
      }
      
    }
  }
  return result;
}

/** Returns true, if columns i,j were found to be correlated. Only works for i < j */
bool
HashCorrelationFinder3::isCorrelationFound(length_type i, length_type j) const {
  ASSERT(i < j);
  // PRECOND(ContainerTools::isSorted((*resultBins)[i]->begin(), (*resultBins)[i]->end());
  return ((i >= 0) && (i < static_cast<length_type>(resultBins->size())) && ((*resultBins)[i] != 0)
	  && (find((*resultBins)[i]->begin(), (*resultBins)[i]->end(), j) != (*resultBins)[i]->end()));
}

/** Returns true if correlation is not part of stem of at least length 2 */
bool
HashCorrelationFinder3::isCorrelationIsolated2(length_type i, length_type j) const {
  ASSERT(i < j);
  //  ASSERT((reverseMode && ((i + 2) < (j - 2))) || ((!reverseMode) && ( i < j)));
  ASSERT(isCorrelationFound(i, j));
  bool notIsolated = false;
  if (reverseMode) {
    length_type di = j - i;
    if (di < 3) {
      return true; // too close interaction
    }
    // ASSERT(i + 2 < j - 2);
    notIsolated = isCorrelationFound(i-1,j+1) || isCorrelationFound(i+1,j-1);
  } else {
    notIsolated = isCorrelationFound(i-1,j-1) || isCorrelationFound(i+1,j+1);
  }
  return !notIsolated;
}

/** Returns true if correlation is not part of stem of at least length 3 */
bool
HashCorrelationFinder3::isCorrelationIsolated3(length_type i, length_type j) const {
  ASSERT(i < j);
  //  ASSERT((reverseMode && ((i + 2) < (j - 2))) || ((!reverseMode) && ( i < j)));
  ASSERT(isCorrelationFound(i, j));
  bool notIsolated = false;
  if (reverseMode) {
    length_type di = j - i;
    if (di < 3) {
      return true; // too close interaction
    }
    // ASSERT(i + 2 < j - 2);
    notIsolated = (isCorrelationFound(i-1,j+1) && (isCorrelationFound(i+1,j-1) || isCorrelationFound(i-2,j+2)))
      || (isCorrelationFound(i+1,j-1) && (isCorrelationFound(i-1,j+1) || ((di > 4) && isCorrelationFound(i+2,j-2) ) ) );
  } else {
    notIsolated = (isCorrelationFound(i-1,j-1) && (isCorrelationFound(i+1,j+1) || isCorrelationFound(i-2,j-2)))
      || (isCorrelationFound(i+1,j+1) && (isCorrelationFound(i+2,j+2) || isCorrelationFound(i-1,j-1)));
  }
  return !notIsolated;
}

/** Filters resultBins datastructure for one column. Works only if columns i-1 and i+1 are defined already NOT THREAD SAFE*/
void
HashCorrelationFinder3::filterIsolatedCorrelation3(length_type i) const {
  PRECOND((i > 0) && (maf != 0) && (resultBins != 0) && ((i + 1) < maf->getTotalLength()) );
  if ((*resultBins)[i] == 0) {
    return ; // nothing to do
  }
  if ( ((*resultBins)[i+1] == 0) && ((*resultBins)[i-1] == 0) ) {
    delete( (*resultBins)[i] ); // not thread-safe
    (*resultBins)[i] = 0; 
  }
  else {
    result_vector_type * colp = new result_vector_type();
    // for (size_t j = 0; j < (*resultBins)[i]->size(); ++j) {
    for (result_vector_type::const_iterator j = (*resultBins)[i]->begin(); j != (*resultBins)[i]->end(); ++j) {
      length_type val = *j; // (*(*resultBins)[i])[j];
      ASSERT(isCorrelationFound(i, val)); 
      if (!isCorrelationIsolated3(i, val)) {
	// (*resultBins)[i].erase((*resultBins)[i].begin() + j);
	colp -> push_back(val);
	// ASSERT(!correlationFound(i, val)); 
      }
    }
    delete( (*resultBins)[i] ); // not thread-safe
    (*resultBins)[i] = colp;
  }
}

HashCorrelationFinder3::cluster_result_type
HashCorrelationFinder3::getResultClusters() const {
  clusterFilter.reset(); // flushAll();  
  cluster_result_type finalClusters;
  //   for (size_type i = 0; i < resultBins->size(); ++i) {
  //     if ((*resultBins)[i] != 0) {
  //       sort((*resultBins)[i]->begin(), (*resultBins)[i]->end()); // sorting not necessary for sets! find algorithm in isCorrelationIsolated3
  //     }
  // }
  for (size_type i = 0; i < resultBins->size(); ++i) {
    if ((*resultBins)[i] != 0) {
      // for (size_type j = 0; j < (*resultBins)[i]->size(); ++j) {
      //      for (size_type j = 0; j < (*resultBins)[i]->size(); ++j) {
      for (result_vector_type::const_iterator jp = (*resultBins)[i]->begin(); jp != (*resultBins)[i]->end(); ++jp) {
	length_type val = *jp;
	// if ((j == 0) || ((*(*resultBins)[i])[j] != (*(*resultBins)[i])[j-1])) {
	// cluster_result_type result = clusterFilter.push(i,(*(*resultBins)[i])[j]); // at most 2 copies of each correlation are expected
	cluster_result_type result = clusterFilter.push(i,val); // at most 2 copies of each correlation are expected
	for (size_type k = 0; k < result.size(); ++k) {
	  finalClusters.push_back(result[k]);
	}
	  // }
      }
    }
  }
  return finalClusters;
}

/** Collects and returns results; NOT thread-safe. */
HashCorrelationFinder3::result_container
HashCorrelationFinder3::getResults() const {
  PRECOND((resultBins != 0) && (static_cast<length_type>(resultBins->size()) == maf->getTotalLength()));
  // Vec<Correlation> results;
  // size_type numFound = 0;
  for (size_type i = 0; i < resultBins->size(); ++i) {
    if ((*resultBins)[i] != 0) {
      sort((*resultBins)[i]->begin(), (*resultBins)[i]->end()); // sorting not necessary for find algorithm in isCorrelationIsolated3
      // numFound += (*resultBins)[i]->size();
    }
  }
  // REMARK << "Number of unfiltered correlations: " << numFound << endl; // this value is not meaningful to user because it contains duplicates
  // results.reserve(numFound);
  Vec<Correlation> finalResults2;
  // finalResults2.reserve(numFound/CLUSTER_REDUCTION);
  SingleLinkage2DProgressiveFilter filter2 = clusterFilter; // make copy     filter2=clusterFilter.getCutoff(), 0);  // incomplete
  filter2.reset();
  filter2.setActive(true);
  size_t rawCount = 0;
  for (size_type i = 0; i < resultBins->size(); ++i) {
    if ((*resultBins)[i] != 0) {
      //      for (size_type j = 0; j < (*resultBins)[i]->size(); ++j) {
      Vec<length_type> foundSortedIds;
      foundSortedIds.reserve((*resultBins)[i]->size());
      for (result_vector_type::const_iterator jp = (*resultBins)[i]->begin(); jp != (*resultBins)[i]->end(); ++jp) {
	if ((jp == (*resultBins)[i]->begin()) || ( *(jp-1) != *jp)) { // only copy unique values
	  foundSortedIds.push_back(*jp);
	}
      }
      sort(foundSortedIds.begin(), foundSortedIds.end());
      // for (result_vector_type::const_iterator jp = (*resultBins)[i]->begin(); jp != (*resultBins)[i]->end(); ++jp) {
      for (size_type j = 0; j < foundSortedIds.size(); ++j) {
	length_type val = foundSortedIds[j]; // *jp;
	if ((!removeIsolated) 
	    || ((stemLengthMin==3) && (!isCorrelationIsolated3(i, val)))
	    || ((stemLengthMin==2) && (!isCorrelationIsolated2(i, val))) ) {
	  // if ((j == 0) || ((*(*resultBins)[i])[j] != (*(*resultBins)[i])[j-1])) {
	  cluster_result_type currClusters = filter2.push(i,val); // at most 2 copies of each correlation are expected
	  ++rawCount;
	  // results.push_back(Correlation(i,(*(*resultBins)[i])[j])); // at most 2 copies of each correlation are expected
	  for (size_t jj = 0; jj < currClusters.size(); ++jj) {
	    for (size_t kk = 0; kk < currClusters[jj].size(); ++kk) {
	      finalResults2.push_back(Correlation(currClusters[jj][kk].first, currClusters[jj][kk].second));
	    }
	  }
	    //	  }
	}
      }
    }
  }
  REMARK << "Number of unfiltered alignment column pairs: " << rawCount << ". Count after applying cluster filter:" << finalResults2.size() << endl;
//   sort(results.begin(), results.end());
//   Vec<Correlation> finalResults;
//   finalResults.reserve(results.size());
//   unique_copy(results.begin(), results.end(), back_inserter(finalResults)); // remove duplicates
//   sort(finalResults.begin(), finalResults.end());
//   for (int i = 1; i < finalResults.size(); ++i) {
//     ERROR_IF(finalResults[i-1] == finalResults[i], "Internal error: Found non-unique correlation!");
//   }
//   results.clear(); // save memory
//   Vec<Correlation> finalResults2;
//   SingleLinkage2DProgressiveFilter filter2 = clusterFilter; // make copy     filter2=clusterFilter.getCutoff(), 0);  // incomplete
//   filter2.reset();
//   filter2.setActive(true);
//   for (Vec<Correlation>::size_type i = 0; i < finalResults.size(); ++i) {
//     cluster_result_type currClusters = filter2.push(finalResults[i].getStart(), finalResults[i].getStop());
//     for (size_t j = 0; j < currClusters.size(); ++j) {
//       for (size_t k = 0; k < currClusters[j].size(); ++k) {
// 	finalResults2.push_back(Correlation(currClusters[j][k].first, currClusters[j][k].second));
//       }
//     }
//   }
//   sort(finalResults2.begin(), finalResults2.end());
  return finalResults2;
}

/** Reranking priority queue of hash tables to be searched. 
 * Idea is to penalize a hash table (multiply its score == size with a factor greater one) 
 * if among the n previous hash table the same assembly was found.
 */ 
void
HashCorrelationFinder3::rerankQueue(queue_type& queue, size_t firstId, size_t lastId, int previous, double scorePenalty) const {
  PRECOND(previous > 0);
  PRECOND(scorePenalty > 1.0);
  size_t seqId1, seqId2, seqId1b, seqId2b;
  for (int i = static_cast<int>(firstId); i < static_cast<int>(lastId); ++i) {
    seqId1 = queue[i].getThird();
    seqId2 = queue[i].getFourth();
    int firstCheck = i - previous;
    if (firstCheck < 0) {
      firstCheck = 0;
    }
    for (int j = i - 1; j >= firstCheck; --j) {
      seqId1b = queue[j].getThird();
      seqId2b = queue[j].getFourth();
      if ((seqId1b == seqId1) || (seqId1b == seqId2)
	  || (seqId2b == seqId1) || (seqId2b == seqId2) ) {
	queue[j].setFirst(queue[j].getFirst() * scorePenalty);
	break;
      }
    }
  }
}

/** Returns column ids of MAF columns that are compatible with the given column-assembly search.
 * Warning: the result can contain values that are not complementary. */
set<HashCorrelationFinder3::length_type> 
HashCorrelationFinder3::searchMafColumn(const string& column, const Vec<string>& colAssemblies,
					length_type colid, queue_type& queue) const {
  PRECOND(column.size() == colAssemblies.size());
  string refAssembly = maf->getRefAssembly();
  PRECOND(refAssembly.size() > 0);
  bool CONVERT_TO_COMPLEMENT = true; // this is a constant used both ways to increase readability
  size_type pc = 0;
  Vec<length_type> result, tmpResult;
  set<length_type> finalResult;
  if (column.size() < 3) {
    return finalResult;
  }
  Vec<string>::const_iterator refIt = find(colAssemblies.begin(), colAssemblies.end(), refAssembly); // id of reference assembly
  if (refIt == colAssemblies.end()) {
    return finalResult; // no sequence with reference assembly in this alignment
  }
  string::size_type refId = distance(colAssemblies.begin(), refIt); // convert pointer to index
  ASSERT(colAssemblies[refId] == refAssembly); 
  char c1 = toupper(column[refId]);
  if (NucleotideTools::isGap(c1)) {
    return finalResult;
  }
  ASSERT(c1 != 'U'); // only DNA allowed now
  if (verbose > 3) {
    REMARK << "Searching column " << (colid + 1) << ": "
	   << column << endl;
  }
  for (string::size_type j = 0; j < column.size(); ++j) {
    if (j == refId) {
      continue;
    }
    char c2 = toupper(column[j]);
    if (NucleotideTools::isGap(c2)) {
      continue;
    }
    ASSERT(c2 != 'U'); // only DNA allowed now
    for (string::size_type k = (j+1); k < column.size(); ++k) {
      if (k == refId) {
	continue;
      }
      char c3 = toupper(column[k]);
      if (NucleotideTools::isGap(c3)) {
	continue;
      }
      ASSERT(c3 != 'U'); // only DNA allowed now
      if ((c1 == c2) && (c1 == c3)) { // ignore searches for conserved nucleotides; there are too many!
	continue;
      }
      ASSERT(colAssemblies[refId].size() > 0);
      ASSERT(colAssemblies[j].size() > 0);
      ASSERT(colAssemblies[k].size() > 0);
      ASSERT(!((c1 == c2) && (c1 == c3)));
      string hashhash = MAFSearchTables3::createHashTableHash(colAssemblies[refId], colAssemblies[j], colAssemblies[k], c1, c2, c3);
      map<string, compressed_type >::const_iterator hashIt = tables->findPositionHash(hashhash);
      // tables->positionHashes.find(hashhash);
      if (hashIt != tables->positionHashes.end()) {
	map<string, set<length_type> >::size_type sz = hashIt->second.size();
	ASSERT(pc < queue.size());
	queue[pc].setFirst(static_cast<double>(sz));
	queue[pc].setSecond(hashhash);
	queue[pc].setThird(j);
	queue[pc].setFourth(k);
	if (verbose > 4) {
	  REMARK << "Column " << (colid + 1) 
		 << " : Added column ids to search queue: " << sz << " " << hashhash << " " << (j+1) << " " << (k+1) << endl;
	  if (verbose > 5) {
	    const compressed_type& addedSet = tables->getSet(queue[pc].second); // findPositionHash(queue[1].second) -> second;
	    cout << addedSet << endl;
	  }
	}
	pc++;
      }
    }
  }
  sort(queue.begin(), queue.begin()+pc); // "best" position set to use first is the smallest one
  // trick for reranking queue: did not seem to improve speed
  // int previous = 2;
  // double rerankPenalty = 3;
  // rerankQueue(queue, 0, pc, previous, rerankPenalty); // .begin(), queue.begin() + pc);
  // sort(queue.begin(), queue.begin()+pc); // "best" position set to use first is the smallest one
  if (pc < 1) {
    if (verbose > 1) {
      REMARK << "Ignored column: " << (colid + 1) << " " << column << " Strange, no hash sets found." << endl;
    }
    // ++ignoredCount; // THREADISSUE ?
    return finalResult;
  }
  ASSERT(pc > 0);
  // ASSERT(positionHashes.find(queue[0].second) != positionHashes.end()); // ensure hash table is found
  const compressed_type& bestSet = tables->getSet(queue[0].second); // findPositionHash(queue[0].second) -> second;
  // compressed_type::iterator bestSetLowerBoundIt = lower_bound(bestSet.begin(), bestSet.end(), colid);
  result.reserve(bestSet.size());
  if (pc > 1) {
    const compressed_type& secondBestSet = tables->getSet(queue[1].second); // findPositionHash(queue[1].second) -> second;
    // ASSERT(bestSet.size() <= secondBestSet.size()); // whole point is to sort by set size. However, ASSERTION is not true precicesly due to compression
    compressed_type::iterator start1Pt = bestSet.begin();
    compressed_type::iterator end1Pt = bestSet.end();
    compressed_type::iterator start2Pt = secondBestSet.begin();
    compressed_type::iterator end2Pt = secondBestSet.end();
    if (searchColumnSplit > 0) {
      if (colid < searchColumnSplit) { // interesting results are in this case > searchColumnSplit
	start1Pt = lower_bound(bestSet.begin(), bestSet.end(), searchColumnSplit); // will point to last or element that is equal or greater search column split
	start2Pt = lower_bound(secondBestSet.begin(), secondBestSet.end(), searchColumnSplit); // will point to last or element that is equal or greater search column split
      } else { // interesting results are in this case < searchColumnSplit
	end1Pt = upper_bound(bestSet.begin(), bestSet.end(), searchColumnSplit); // will point to last or element that is equal or greater search column split
	end2Pt = upper_bound(secondBestSet.begin(), secondBestSet.end(), searchColumnSplit); // will point to last or element that is equal or greater search column split
      }
    }
    set_intersection(start1Pt, end1Pt, start2Pt, end2Pt, back_inserter(result)); // FIXIT test lowerBound 
  } else {
    if (bestSet.size() <= searchColumnMax) {
      // compressed_type::const_iterator bestSetLowerBoundIt = lower_bound(bestSet.begin(), bestSet.end(), colid);
      for (compressed_type::const_iterator it = bestSet.begin(); it != bestSet.end(); it++ ) {   
	if ((searchColumnSplit <= 0)
	    || ((colid < searchColumnSplit) && (*it >= searchColumnSplit))
	    || ((colid >= searchColumnSplit) && (*it < searchColumnSplit)) ) {
	  result.push_back(*it);
	} 
      }
    } else {
      if (verbose > 1) {
	REMARK << "Ignored column: " << (colid + 1) << " " << column << " Too many possible columns to be searched in only one set ( " << bestSet.size() << " )" << endl; 
      }
      // ++ignoredCount;
      return finalResult;
    }
  }
  // ASSERT(ContainerTools::isSorted(bestSet.begin(), bestSet.end()));
  // ASSERT(ContainerTools::isSorted(secondBestSet.begin(), secondBestSet.end()));
  // REMARK << "Copied smallest found set: " << queue[0].second << " " << result.size() << endl;
  // central loop: consecutive intersections!
  if (result.size() > 0) {
    tmpResult.reserve(result.size());
    for (Vec<RankedSolution3<string> >::size_type i = 2; i < pc; ++i) {
      // ASSERT(positionHashes.find(queue[i].second) != positionHashes.end()); // ensure hash table is found
      tmpResult.clear();
      // ASSERT((result.size() > 0) && (*(result.begin()) >= colid)); // has already minimum size
      const compressed_type& nextBestSet = tables->getSet(queue[i].second); 
      // ASSERT(ContainerTools::isSorted(otherSet.begin(), otherSet.end()));
      Vec<length_type>::iterator start1Pt = result.begin();
      Vec<length_type>::iterator end1Pt = result.end();
      compressed_type::iterator start2Pt = nextBestSet.begin();
      compressed_type::iterator end2Pt = nextBestSet.end();
      if (searchColumnSplit > 0) {
	if (colid < searchColumnSplit) { // interesting results are in this case > searchColumnSplit
	  start1Pt = lower_bound(result.begin(), result.end(), searchColumnSplit); // will point to last or element that is equal or greater search column split
	  start2Pt = lower_bound(nextBestSet.begin(), nextBestSet.end(), searchColumnSplit); // will point to last or element that is equal or greater search column split
	} else { // interesting results are in this case < searchColumnSplit
	  end1Pt = upper_bound(result.begin(), result.end(), searchColumnSplit); // will point to last or element that is equal or greater search column split
	  end2Pt = upper_bound(nextBestSet.begin(), nextBestSet.end(), searchColumnSplit); // will point to last or element that is equal or greater search column split
	}
      }
      set_intersection(start1Pt, end1Pt, start2Pt, end2Pt, back_inserter(tmpResult));
      // ContainerTools::setIntersection(result.begin(), result.end(), otherSet, tmpResult);
      result = tmpResult;
      // ASSERT(ContainerTools::isSorted(result.begin(), result.end()));
      if (result.size() == 0) {
	break;
      }
    }
  }
  if (result.size() <= searchColumnMax) {
    length_type lastInserted = -999;
    bool badStretchFlag = false;
    for (Vec<length_type>::const_iterator it = result.begin(); it != result.end(); it++) {
      ASSERT((colid >= 0) && (colid < maf->getTotalLength()));
      ASSERT((*it >= 0) && ((*it) < maf->getTotalLength()));
      if (corrDistMin > 0) {
	if (colid >= *it) {
	  if ((colid - *it) < corrDistMin) {
	    continue;
	  }
	} else {
	  if ((*it - colid) < corrDistMin) {
	    continue;
	  }
	}
      }
      if (!adjacentMode) {
	Vec<length_type>::const_iterator before = it;
	before--;
	Vec<length_type>::const_iterator after = it;
	after++;
	if (((it != result.begin()) && (*it == *before)) || ((it != result.end()) && (*it == *after))) {
	  continue; // ignore stretches of adjacent matching columns
	}
      }
      ASSERT(!allowGap);
      if (complementMode) {
	if (MAFAlignmentTools::isColumnPairAlmostComplementary(*maf, colid, *it, allowedGuFrac, allowedGapFrac, !CONVERT_TO_COMPLEMENT,
							       basepairTypeMin)) {
	  bool okFlag = true;
//  	  if (MAFAlignmentTools::isColumnPairComplementary(*maf, colid, *it, true, false) 
//  	      &&  (!MAFAlignmentTools::isColumnPairComplementary(*maf, colid, *it, false, false)) ) {
//  	    cout << "Found GU base pairing:" << colid << " " << maf->getSlice(colid) << " "
//  		 << (*it) << " " << maf->getSlice(*it) << endl;
//  	  }
	  // if checkNeighborMode is true: for i,j to be complementary, either i+1,j-1 or i-1, j+1 have to be complementary also
	  if (checkNeighborMode && (colid > 0) && ((colid+1) < maf->getTotalLength()) 
	      && (*it > 0) && ( (*it + 1) < maf->getTotalLength())) {
	    if ((!MAFAlignmentTools::isColumnPairAlmostComplementary(*maf, colid+1, (*it)-1, allowedGuFrac, allowedGapFrac, !CONVERT_TO_COMPLEMENT, 1))
   // 12/2011:
		&& (!MAFAlignmentTools::isColumnPairAlmostComplementary(*maf, colid-1, (*it)+1, allowedGuFrac, allowedGapFrac,
									!CONVERT_TO_COMPLEMENT, 1))) { 
	      if (verbose > 5) {
		REMARK << "Ignoring column pair " << (colid+1) << " " << (*it +1) << " because its neigbors are not complementary!" << endl;
	      }
	      okFlag = false;
	    }
	    if (okFlag) {
	      switch (checkAntiNeighborMode) {
	      case 0: break; // do nothing
	      case 1: // do not allow both of two "wrong" diagonals to be complementary
		if ((MAFAlignmentTools::isColumnPairAlmostComplementary(*maf, colid+1, (*it)+1, allowedGuFrac, allowedGapFrac, !CONVERT_TO_COMPLEMENT, 1))
		    && (MAFAlignmentTools::isColumnPairAlmostComplementary(*maf, colid-1, (*it)-1, allowedGuFrac, allowedGapFrac, !CONVERT_TO_COMPLEMENT, 1))) {
		  okFlag = false;
		}
		break;
	      case 2: // do not allow any of two "wrong" diagonals to be complementary
		if ((MAFAlignmentTools::isColumnPairAlmostComplementary(*maf, colid+1, (*it)+1, allowedGuFrac, allowedGapFrac, !CONVERT_TO_COMPLEMENT, 1))
		    || (MAFAlignmentTools::isColumnPairAlmostComplementary(*maf, colid-1, (*it)-1, allowedGuFrac, allowedGapFrac, !CONVERT_TO_COMPLEMENT, 1))) {
		  okFlag = false;
		}
		break;
	      default: ERROR("Invalid anti-neighdbor mode. Must be between 0 and 2.");
	      }
	    }
	  }
	  if (okFlag) {
	    if (adjacentMode) {
	      finalResult.insert(*it); // ok to insert Correlation(highIndex, lowIndex) as well as Correlation(lowIndex, highIndex)
	      lastInserted = *it;
	    } 
	    else {
	      if (*it == (lastInserted + 1)) {
		ASSERT(finalResult.find(lastInserted) != finalResult.end());
		finalResult.erase(lastInserted);
		badStretchFlag = true;
		finalResult.insert(*it); // ok to insert Correlation(highIndex, lowIndex) as well as Correlation(lowIndex, highIndex)
		lastInserted = *it;
		;
	      } else {
		if (badStretchFlag) {
		  finalResult.erase(lastInserted);
		  badStretchFlag = false; // do not store: end of bad stretch
		} else {
		  finalResult.insert(*it); // ok to insert Correlation(highIndex, lowIndex) as well as Correlation(lowIndex, highIndex)
		  lastInserted = *it;
		}
	      }
	    }
	  }
	} else {
	  if (verbose > 2) {
	    REMARK << "A non-complementary column pair was initially found but later thrown out: "
		   << maf->getSlice(colid) << " " << (colid + 1) << endl;
            if (verbose > 3) {
	      REMARK << "Allowed GU fraction: " <<  allowedGuFrac << " allowed gap fraction: " << allowedGapFrac << endl;
	    }
	    if (tables->getAssemblyPairFraction() == 1.0) {
	      // this can still legaly happen for example in the case of present gaps 
	      // like chrY alignment pair columns: 263180 274356
	      // -AAAATAAAAAAAN ATTTTATTTTTTTT anoCar1 bosTau3 canFam2 danRer4 equCab1 galGal3 gasAcu1 hg18 mm8 monDom4 ornAna1 panTro2 rheMac2 rn4
	      MAFAlignmentTools::writeCommonColumnPair(cout, *maf, colid, *it); // FIXIT : missing # sign...
	      // 	  ERROR(  "Internal error: did not expect to find non-complementary alignment pair columns: " + itos(colid + 1)
	      // 		   + " " + itos((*it) + 1));
	    }
	  }
	}
      } else { // looking in this mode not for complementary but equivalent columns:
	ERROR_IF(reverseMode,
		 "When looking for matching alignment columns, reverse-mode has to be false.");  
	// if (MAFAlignmentTools::isColumnPairEquivalent(*maf, colid, *it)) {
	if (MAFAlignmentTools::isColumnPairAlmostComplementary(*maf, colid, *it, allowedGuFrac, allowedGapFrac, CONVERT_TO_COMPLEMENT, basepairTypeMin)) {
	  bool okFlag = true;
	  // if checkNeighborMode is true: for i,j to be complementary, either i+1,j+1 or i-1, j-1 have to be complementary also
	  if (checkNeighborMode && (colid > 0) && ((colid+1) < maf->getTotalLength()) 
	      && (*it > 0) && ( (*it + 1) < maf->getTotalLength())) {
	    //	    if ((!MAFAlignmentTools::isColumnPairEquivalent(*maf, colid+1, (*it)+1))
	    //	&& (!MAFAlignmentTools::isColumnPairEquivalent(*maf, colid-1, (*it)-1))) {
	    if ((!MAFAlignmentTools::isColumnPairAlmostComplementary(*maf, colid+1, (*it)+1, allowedGuFrac,allowedGapFrac, CONVERT_TO_COMPLEMENT, 1))
		&& (!MAFAlignmentTools::isColumnPairAlmostComplementary(*maf, colid-1, (*it)-1, allowedGuFrac, allowedGapFrac, CONVERT_TO_COMPLEMENT, 1))) {
	      if (verbose > 5) {
		REMARK << "Ignoring column pair " << (colid+1) << " " << (*it +1) << " because its neigbors are not matching!" << endl;
	      }
	      okFlag = false;
	    }
	    if (okFlag) {
	      switch (checkAntiNeighborMode) {
	      case 0: break; // do nothing
	      case 1: // do not allow both of two "wrong" diagonals to be complementary
		if ((MAFAlignmentTools::isColumnPairAlmostComplementary(*maf, colid-1, (*it)+1, allowedGuFrac, allowedGapFrac, CONVERT_TO_COMPLEMENT, 1))
		    && (MAFAlignmentTools::isColumnPairAlmostComplementary(*maf, colid+1, (*it)-1, allowedGuFrac, allowedGapFrac, CONVERT_TO_COMPLEMENT, 1))) {
		  okFlag = false;
		}
		break;
	      case 2: // do not allow any of two "wrong" diagonals to be complementary
		if ((MAFAlignmentTools::isColumnPairAlmostComplementary(*maf, colid-1, (*it)+1, allowedGuFrac, allowedGapFrac, CONVERT_TO_COMPLEMENT, 1))
		    || (MAFAlignmentTools::isColumnPairAlmostComplementary(*maf, colid+1, (*it)-1, allowedGuFrac, allowedGapFrac, CONVERT_TO_COMPLEMENT, 1))) {
		  okFlag = false;
		}
		break;
	      default: ERROR("Invalid anti-neighdbor mode. Must be between 0 and 2.");
	      }
	    }

	  }
	  if (okFlag) {
	    if (adjacentMode) {
	      finalResult.insert(*it); // ok to insert Correlation(highIndex, lowIndex) as well as Correlation(lowIndex, highIndex)
	      lastInserted = *it;
	    } 
	    else {
	      ASSERT(false); // not sure about this mode yet; FIXIT
	      if (*it == (lastInserted + 1)) {
		ASSERT(finalResult.find(lastInserted) != finalResult.end());
		finalResult.erase(lastInserted);
		badStretchFlag = true;
		finalResult.insert(*it); // ok to insert Correlation(highIndex, lowIndex) as well as Correlation(lowIndex, highIndex)
		lastInserted = *it;
	      } else {
		if (badStretchFlag) {
		  finalResult.erase(lastInserted);
		  badStretchFlag = false; // do not store: end of bad stretch
		} else {
		  finalResult.insert(*it); // ok to insert Correlation(highIndex, lowIndex) as well as Correlation(lowIndex, highIndex)
		  lastInserted = *it;
		}
	      }
	    }
	  }
	  // finalResult.insert(*it); // ok to insert Correlation(highIndex, lowIndex) as well as Correlation(lowIndex, highIndex)
	} else {
	  if (verbose > 2) {
	    REMARK << "A non-equivalent column pair was initially found but later thrown out: "
		   << maf->getSlice(colid) << " " << (colid + 1) << endl;
            if (verbose > 3) {
	      REMARK << "Allowed GU fraction: " <<  allowedGuFrac << " allowed gap fraction: " << allowedGapFrac << endl;
	    }
	    if (tables->getAssemblyPairFraction() == 1.0) {
	      // this can still legally happen for example in the case of present gaps 
	      // like chrY alignment pair columns: 263180 274356
	      // -AAAATAAAAAAAN ATTTTATTTTTTTT anoCar1 bosTau3 canFam2 danRer4 equCab1 galGal3 gasAcu1 hg18 mm8 monDom4 ornAna1 panTro2 rheMac2 rn4
	      MAFAlignmentTools::writeCommonColumnPair(cout, *maf, colid, *it); // FIXIT : missing # sign...
	      // 	  ERROR(  "Internal error: did not expect to find non-complementary alignment pair columns: " + itos(colid + 1)
	      // 		   + " " + itos((*it) + 1));
	    }
	  }
	}
      }
      
    }
  } else {
    if (verbose > 1) {
      REMARK << "Ignored column: " << (colid + 1) << " " << column << " Too many possible columns to be searched ( " << result.size() << " )" << endl; 
    }
    localMatchPairCount += result.size(); // make conservative assumption that all ignored
    // matches would be valid matches. This will increase the mean density of matches 
    // and increase the Poisson based P/E value estimation (will become less significant).
    // ++ignoredCount;
  }
  return finalResult;
}


/** Compute search area for each assembly combination. Assumes that MAF is a NOT a concatenation of two parts; instead the whole MAF is searched
 * in an all-versus-all manner.
 */
HashCorrelationFinder3::count_hash_type 
HashCorrelationFinder3::computeUnsplitSearchAreas() const {
  count_hash_type result;
  Vec<length_type> badColSizes(maf->size());
  for (MAFAlignment::size_type i = 0; i < maf->size(); ++i) {
    length_type len = (*maf)[i].getLength();
    set<size_t> badCols;
    badCols.clear();
    for (length_type ii = 0; ii < len; ++ii) {
      string slice = (*maf)[i].getColumn(ii);
      if (!isSearchColumnOK(slice)) {
	badCols.insert(ii);
      }
    }
    badColSizes[i] = badCols.size();
  }

  for (MAFAlignment::size_type i = 0; i < maf->size(); ++i) {
    if (!isBlockPairSearchable(i,i)) {
      continue;
    }
    string assemblies = maf->getAlignmentAssembliesHash(i);
    length_type len = (*maf)[i].getLength();
    length_type area = (len * (len-1)) / 2;
    if (corrDistMin > 1) {
      for (length_type k = 1; k < corrDistMin; ++k) { // subtract other diagonals that are not part of search area
  	area -= (len-k);
      }
    }
    for (length_type ii = 0; ii < badColSizes[i]; ++ii) {
      length_type diff = (len - (2*corrDistMin) - ii);  // adding badCols.size(): avoid overcounting: those columns were alreay marked as "bad", not adding the size
      if (verbose > 5) {
	REMARK << "Subtracting " << diff << " from search area of block " << (i+1) << " " << area << endl;
      }
      area -= diff;
      // (reducing the number of subtracted columns) would be penalizing several times certain interactions
      // making a diagram for this situation helps
      ERROR_IF(area < 0, "HashCorrelationFinder3: Internal error: negative area encountered.");
    }
    if (result.find(assemblies) == result.end()) {
      if (verbose > 2) {
	REMARK << "Setting area " << area << " to assembly " << assemblies << " alignment: " << (i+1) << endl;
      }
      result[assemblies] = area;
    } else {
      if (verbose > 2) {
	REMARK << "Adding area " << area << " to assembly " << assemblies << " alignment: " << (i+1) << endl;
      }
      result[assemblies] += area;
    }
    for (MAFAlignment::size_type j = i+1; j < maf->size(); ++j) {
      if (verbose > 5) {
	REMARK << "Checking MAF block combination for searchability: " << (i+1) << " " << (j+1) << endl;
      }
      if (!isBlockPairSearchable(i,j)) {
        continue;
      }
      length_type len2 = (*maf)[j].getLength();
      string commonAssemblies = maf->getCommonAssembliesHash(i,j);
      length_type area = (len-badColSizes[i]) * (len2 - badColSizes[j]);     
      if (result.find(commonAssemblies) == result.end()) {
  	result[commonAssemblies] = area;
	if (verbose > 2) {
	  REMARK << "Setting area " << area << " to common-assembly-hash " << commonAssemblies << " alignments: " << (i+1) << " " << (j+1) <<  endl;
	}
      } else {
  	result[commonAssemblies] += area;
	if (verbose > 2) {
	  REMARK << "Adding area " << area << " to common-assembly-hash " << commonAssemblies << " alignments: " << (i+1) << " " << (j+1) <<  endl;
	}
      }
    }
  }

  // first compute sum of all alignment lengths:
  // for (MAFAlignment::size_type i = 0; i < maf->size(); ++i) {
  //   string assemblies = maf->getAlignmentAssembliesHash(i);
  //   length_type len = (*maf)[i].getLength();
  //   if (result.find(assemblies) == result.end()) {
  //     result[assemblies] = len;
  //   } else {
  //     result[assemblies] += len;
  //   }
  // }
  // for (count_hash_type::iterator it = result.begin(); it != result.end(); it++) {
  //   length_type len = it->second;
  //   length_type area = (len * (len-1)) / 2;
  //   if (corrDistMin > 1) { // subtract more diagonals :
  //     for (length_type k = 1; k < corrDistMin; ++k) {
  // 	area -= (len-k);
  //     }
  //   }
  //   it->second = area;
  // }
  if (verbose > 4) {
    REMARK << "Finished HashCorrelationFinder3::computeUnsplitAreas" << endl;
  }
  return result;
}

/** Compute search area for each assembly combination. Assumes that MAF is a concatenation of two parts. The index of the first 
 * column of the second part (zero based internal coordinates) is given as the variable searchMax.
 */
HashCorrelationFinder3::count_hash_type 
HashCorrelationFinder3::computeSplitSearchAreas(length_type searchMax) const {
  ASSERT(searchMax < maf->getTotalLength());
  if (verbose > 2) {
    REMARK << "Starting HashCorrelationFinder3::computeSplitSearchAreas" << endl;
  }
  MAFAlignment::size_type aliId = static_cast<MAFAlignment::size_type>(maf->getAlignmentId(searchMax));
  count_hash_type result;
  if (verbose > 2) {
    REMARK << "Starting HashCorrelationFinder3::computeSplitSearchAreas" << endl;
    REMARK << "Alignment blocks 1 to " << aliId << " belong to first part, alignment blocks " << (aliId+1)
	   << " to " << maf->size() << " belong to second part." << endl;
  }
  Vec<length_type> badColSizes(maf->size());
  for (MAFAlignment::size_type i = 0; i < maf->size(); ++i) {
    length_type len = (*maf)[i].getLength();
    set<size_t> badCols;
    badCols.clear();
    for (length_type ii = 0; ii < len; ++ii) {
      string slice = (*maf)[i].getColumn(ii);
      if (!isSearchColumnOK(slice)) {
	badCols.insert(ii);
      }
    }
    badColSizes[i] = badCols.size();
  }
  for (MAFAlignment::size_type i = 0; i < aliId; ++i) {
    length_type len = (*maf)[i].getLength();
     for (MAFAlignment::size_type j = aliId; j < maf->size(); ++j) {
      if (!isBlockPairSearchable(i,j)) {
        continue;
      }
      string commonAssemblies = maf->getCommonAssembliesHash(i,j);
      length_type len2 = (*maf)[j].getLength();
      length_type area = (len - badColSizes[i]) * (len2 - badColSizes[j]);
      if (verbose > 2) {
	REMARK << "Adding area for assembly combination " << commonAssemblies << " " << (i+1) << " " << (j+1) << " " << area << endl;
      }
      if (result.find(commonAssemblies) == result.end()) {
	result[commonAssemblies] = area;
      } else {
	result[commonAssemblies] += area;
      }
    }
  }
  if (verbose > 2) {
    REMARK << "List of found areas for different assembly combinations: " << endl;
    for (count_hash_type::const_iterator it = result.begin(); it != result.end(); it++) {
      REMARK << it->first << "\t" << it->second << endl;
    }
    REMARK << "Finished HashCorrelationFinder3::computeSplitSearchAreas" << endl;
  }
  return result;

}

/** Returns static variable that keeps track of total number of found matching column pairs for different kinds of assembly combinations.
 *  searchMax indicates where the alignment is split in two parts.
 *  this method has do the equivalent to the method getResults, otherwise the density estimation will be skewed
 */
HashCorrelationFinder3::count_hash_type
HashCorrelationFinder3::getMatchPairCountHash3(length_type searchMax) const {
  count_hash_type result;
  if (searchMax > static_cast<length_type>(resultBins->size())) {
    searchMax = static_cast<length_type>(resultBins->size());
  }
  length_type totLen = maf->getTotalLength();
  length_type pos = 0;
  length_type aliIdMax = maf->size();
  if (searchMax < totLen) {
    aliIdMax = maf->getAlignmentId(searchMax);
  }
  // throw error; this method has do the equivalent to the method getResults, otherwise the density estimation will be skewed
  ERROR_IF(stemLengthMin != 1, "A minimum stem length other than 1 (option --stem) is currently not supported.");
  for ( length_type i = 0; i < aliIdMax; ++i) {
    for ( size_type i2 = 0; i2 < (*maf)[i].getLength(); ++i2) {
      ERROR_IF(pos >= totLen, "Internal error in getMatchPairCountHash3");
      if ((*resultBins)[pos] != 0) {
        set<length_type> idSet;
        // convert to non-redundant set:
	// for ( size_type j = 0; j < (*resultBins)[pos]->size(); ++j) {
	for (result_vector_type::const_iterator jp = (*resultBins)[pos]->begin(); jp != (*resultBins)[pos]->end(); ++jp) {
	  idSet.insert( *jp );
	}
	for (set<length_type>::const_iterator it = idSet.begin(); it != idSet.end(); ++it) {
	  length_type pos2 = *it; // (*(*resultBins)[pos])[j];
    // count each covariation only once; in case of searching 2 disjunct regions, only count covariations that span the two regions:
	  if ((pos2 > pos) && ((searchMax >= static_cast<length_type>(resultBins->size())) || ((pos < static_cast<length_type>(searchMax)) && (pos2 >= static_cast<length_type>(searchMax))) ) )  { 
	    length_type aliId2 = maf->getAlignmentId(pos2);
	    if (!isBlockPairSearchable(i, aliId2)) {
              if (verbose > 2) {
		REMARK << "MAF block pair corresponding to detected covariation was not searchable. " << (pos+1) << " (" << (i+1) << ")" 
		     << (pos2 + 1) << "(" << (aliId2+1) << ")" << endl;
		REMARK << "Found covariation between columns with internal positions " << (pos+1) << " alignment " << (i+1) 
		       << ") and " << (pos2+1) << " (alignment " << (aliId2+1) << " ) " << endl;
		if (searchMax < totLen) {
		  REMARK << "Search was split into two disjunct areas." << endl;
		}
		REMARK << "Maximal index of search column of first search area: " << searchMax << " Corresponding maximum alignment id: " << aliIdMax << endl;
		REMARK << "First alignments: " << endl;
		(*maf)[i].writeFasta(cout);	      
		REMARK << "Second alignment:" << endl;
		(*maf)[aliId2].writeFasta(cout);	      
		string hash = maf->getCommonAssembliesHash(i, aliId2);
		REMARK << "Hash of common assemblies: " << hash << endl;
		REMARK << "First column (raw): " << maf->getSlice(pos) << endl;
		REMARK << "Second column (raw): " << maf->getSlice(pos2) << endl;
		REMARK << "First column (using assemblies that are in common): " << maf->getSlice(pos, maf->getCommonAssemblies(i, aliId2)) << endl; 
		REMARK << "Second column (using assemblies that are in common): " << maf->getSlice(pos2, maf->getCommonAssemblies(i, aliId2)) << endl; 
	      }
	      continue; // ignore these cases, should be rare
	    }
	    string hash = maf->getCommonAssembliesHash(i, aliId2);
	    if (result.find(hash) == result.end()) {
	      result[hash] = 1;
	    } else {
	      result[hash] += 1;
	    }
	  }
	}
      }
      ++pos;
    }
  }
  return result;
}

/**Writes contests of results datastructure. Further filtering will be applied, but this helps to understand to estimate the density of hits.
 */
void
HashCorrelationFinder3::writeRawResults(ostream& os, length_type searchMax) const {
  if (searchMax > static_cast<length_type>(resultBins->size())) {
    searchMax = static_cast<length_type>(resultBins->size());
  }
  length_type totLen = maf->getTotalLength();
  length_type pos = 0;
  length_type aliIdMax = maf->size();
  if (searchMax < totLen) {
    aliIdMax = maf->getAlignmentId(searchMax);
  }
  for ( length_type i = 0; i < aliIdMax; ++i) {
    for ( size_type i2 = 0; i2 < (*maf)[i].getLength(); ++i2) {
      ERROR_IF(pos >= totLen, "Internal error in getMatchPairCountHash3");
      if ((*resultBins)[pos] != 0) {
	os << (pos+1) << " ( " << (maf->getAssemblyPosition(pos, maf->getRefAssembly()) + 1) << "," 
	   << (i+1) << "," << (i2+1) << " ) : ";
	// for ( size_type j = 0; j < (*resultBins)[pos]->size(); ++j) {
	// for ( size_type j = 0; j < (*resultBins)[pos]->size(); ++j) {
	for (result_vector_type::const_iterator jp = (*resultBins)[pos]->begin(); jp != (*resultBins)[pos]->end(); ++jp) {
	  length_type pos2 = *jp; // (*(*resultBins)[pos])[j];
	  length_type aliId2 = maf->getAlignmentId(pos2);
	  string hash = maf->getCommonAssembliesHash(i, aliId2);
	  os << (pos2+1) << " ( " // <<  (maf->getAssemblyPosition(pos2, maf->getRefAssembly()) + 1) << "," 
	     << (aliId2+1) << " ) " << hash << " ; ";
	}
	os << endl;
      } else {
	os << (pos+1) << " no hits." << endl;
      }
      ++pos;
    }
  }
}

/** Compute densities of found covarying alignment columns. Assumes that search has been performed and is completed.
 * searchMax indicates where MAF is split in two parts. If searchMax >= maf->getTotalLength(), then it is assumed 
 * that the genomic alignment consists not of two regions (only one original MAF was supplied).
 */
HashCorrelationFinder3::double_hash_type 
HashCorrelationFinder3::computeDensities(length_type searchMax, bool addEmpty, ostream * os) const {
  if (verbose > 4) {
    REMARK << "Starting HashCorrelationFinder3::computeDensities" << endl;
  }
  count_hash_type areas;
  if ((searchMax == 0) || (searchMax >= maf->getTotalLength())) {
    areas = computeUnsplitSearchAreas();
  } else {
    areas = computeSplitSearchAreas(searchMax);
  }
  count_hash_type counts = getMatchPairCountHash3(searchMax);
  if (verbose > 4) {
    REMARK << "Found corelations for " << counts.size() << " distinct assembly combinations." << endl;
    REMARK << "Found " << areas.size() << " distinct searchable assembly combinations." << endl;
  }
  double_hash_type result;
  length_type pseudocount1 = 0; // corresponds to positive cases  
  length_type pseudocount2 = 0; // corresponds to negative cases
  for (count_hash_type::const_iterator it = counts.begin(); it != counts.end(); ++it) {
    ASSERT(counts.size() > 0);
    string assembly = it->first;
    size_t count = (*it).second;
    ERROR_IF(areas.find(assembly) == areas.end(), "Internal error in computeDensities: could not find assembly combination " + assembly);
    double area = areas[assembly];
    ERROR_IF(area <= 0.0, "Internal error in computeDensities: area less than zero encountered.");
    double density = (count + pseudocount1) / static_cast<double>(area + pseudocount1 + pseudocount2);
    if (verbose > 1) {
      REMARK << "Setting density of assembly combination to " << count << " / " << area << " = " << density << endl;
    }
    result[assembly] = density;
    if (os != NULL) {
      (*os) << assembly << "\t" << density << "\t" << count << "\t" << area << endl;
    }
  }
  if (addEmpty) {
    for (count_hash_type::const_iterator it = areas.begin(); it != areas.end(); ++it) {
      string areaHash = it->first;
      double area = it->second;
      if (result.find(areaHash) == result.end()) {
	// not found. Store instead pseudocount
	// double density = pseudocount1 / (area + pseudocount1 + pseudocount2);
        size_t numCommon = findPositions(areaHash, '_').size() + 1; // count number of assembly "words"
        if (numCommon >= minNonGap) { // only write and store if number of common assemblies is larger or equal than minimum number of characters per column
	  size_t count = 0;
	  double density = 1.0 / area; // use 1 instead of 0
	  result[areaHash] = density;
          if (os != NULL) {
	    (*os) << areaHash << "\t" << density << "\t" << count << "\t" << area << endl;
	  } else {
            if (verbose > 4) {  
	      REMARK << "Estimated density: " << areaHash << "\t" << density << "\t" << count << "\t" << area << endl;
	    }
	  }
	}
      }
    }
  }
  if (verbose > 4) {
    REMARK << "Finished HashCorrelationFinder3::computeDensities" << endl;
  }
  return result;
}

/** Augments densities for areas where no covariation was found in the shuffled alignments.
 */
void
HashCorrelationFinder3::augmentDensities(double_hash_type& result, length_type searchMax) const {
  // DEBUG_MSG("Starting augmentDensities");
  count_hash_type areas;
  if ((searchMax == 0) || (searchMax >= maf->getTotalLength())) {
    areas = computeUnsplitSearchAreas();
  } else {
    areas = computeSplitSearchAreas(searchMax);
  }
  double pseudocount = 1.0;
  for (count_hash_type::const_iterator it = areas.begin(); it != areas.end(); ++it) {
    string areaHash = it->first;
    double area = it->second;
    if (result.find(areaHash) == result.end()) {
      // not found. Store instead pseudocount
      if (area <= 0.0) {
	result[areaHash] = 1.0; // highest possible density
      } else {	
	double density = pseudocount / area;
	result[areaHash] = density;
      }
    }
  }
  // DEBUG_MSG("Finished augmentDensities");
}

/** Returns natural logarithm of  probability of a particular stem to be found at a particular position. Multiply with number of possible positions
 * to obtain E-value (either (N*(N-1))/2 for one MAF alignment (N == totalLength), or N*M for two MAF alignments */ 
double
HashCorrelationFinder3::computeLogPValue(const Stem& stem) const {
  double result = 0.0;
  // int verboseStore = getVerbose();
  // setVerbose(3);
  // verbose = 3;
  // find number of columns for two positions:
  length_type i = stem.getStart();
  length_type j = stem.getStop();
  ASSERT(i != j);
  length_type aliId1 = maf -> getAlignmentId(i);
  length_type aliId2 = maf -> getAlignmentId(j);
  /** Returns set of assemblies, that are in common between two alignments */
  set<string> commonAssemblies = maf->getCommonAssemblies(aliId1,aliId2);
  string slice1 = maf-> getSlice(i, commonAssemblies);
  string slice2 = maf-> getSlice(j, commonAssemblies);
  size_type highestSeqCount = 100; // findHighestSequenceCount(0, maf->getTotalLength());
  queue_type queue((highestSeqCount * (highestSeqCount-1))/2);
  Vec<string> assemblyVec(commonAssemblies.size());
  set<string>::const_iterator it2 = commonAssemblies.begin();
  for (Vec<string>::iterator it = assemblyVec.begin(); it != assemblyVec.end(); it++, it2++) {
    *it = *it2;
  }
  size_type n1 = 0;
  size_type n2 = 0;  
  if (complementMode) {
    n1 = searchMafColumn(NucleotideTools::dnaComplement(slice1), assemblyVec, i, queue).size();
    n2 = searchMafColumn(NucleotideTools::dnaComplement(slice2), assemblyVec, j, queue).size();
  } else {
    n1 = searchMafColumn(slice1, assemblyVec, i, queue).size();
    n2 = searchMafColumn(slice2, assemblyVec, j, queue).size();
  }
  // setVerbose(verboseStore); // set to previous value
  // verbose = verboseStore;
  if (n1 == 0) {
    cout << "# Strange error in slice-1 of p-value computation: " << stem << " " << slice1 << " " << slice2 << " " << assemblyVec << " : " << n1 << endl;
    return 0.0;
  }
  if (n2 == 0) {
    cout << "# Strange error in slice-2 of p-value computation: " << stem << " " << slice1 << " " << slice2 << " " << assemblyVec << " : " << n2 << endl;
    return 0.0;
  }
  ASSERT(n1 > 0); // if nothing complementary, how can it be part of a stem?
  ASSERT(n2 > 0);
  double logp1 = log(static_cast<double>(n1)/static_cast<double>(maf->getTotalLength()));
  double logp2 = log(static_cast<double>(n2)/static_cast<double>(maf->getTotalLength()));
  result = 0.5 * (logp1 + logp2); // corresonds to *geometric* mean of probabilities
  if (stem.getLength() > 1) {
    result += computeLogPValue(Stem(stem.getStart() + 1, stem.getStop()-1, stem.getLength()-1)); // recursive
  }
  return result;
}

/** Returns natural logarithm of  probability of a particular stem to be found at a particular position. Multiply with number of possible positions
 * to obtain E-value (either (N*(N-1))/2 for one MAF alignment (N == totalLength), or N*M for two MAF alignments */ 
double
HashCorrelationFinder3::computeForwardLogPValue(const Stem& stem) const {
  double result = 0.0;
  // int verboseStore = getVerbose();
  // setVerbose(3);
  // verbose = 3;
  // find number of columns for two positions:
  length_type i = stem.getStart();
  length_type j = stem.getStop();
  ASSERT(i != j);
  if ((i < 0) || (j < 0) || (i >= maf->getTotalLength()) || (j >= maf->getTotalLength())) {
    return 0.0;
  }
  length_type aliId1 = maf -> getAlignmentId(i);
  length_type aliId2 = maf -> getAlignmentId(j);
  /** Returns set of assemblies, that are in common between two alignments */
  set<string> commonAssemblies = maf->getCommonAssemblies(aliId1,aliId2);
  string slice1 = maf-> getSlice(i, commonAssemblies);
  string slice2 = maf-> getSlice(j, commonAssemblies);
  size_type highestSeqCount = 100; // findHighestSequenceCount(0, maf->getTotalLength());
  queue_type queue((highestSeqCount * (highestSeqCount-1))/2);
  Vec<string> assemblyVec(commonAssemblies.size());
  set<string>::const_iterator it2 = commonAssemblies.begin();
  for (Vec<string>::iterator it = assemblyVec.begin(); it != assemblyVec.end(); it++, it2++) {
    *it = *it2;
  }
  size_type n1 = 0;
  size_type n2 = 0;  
  if (complementMode) {
    n1 = searchMafColumn(NucleotideTools::dnaComplement(slice1), assemblyVec, i, queue).size();
    n2 = searchMafColumn(NucleotideTools::dnaComplement(slice2), assemblyVec, j, queue).size();
  } else {
    n1 = searchMafColumn(slice1, assemblyVec, i, queue).size();
    n2 = searchMafColumn(slice2, assemblyVec, j, queue).size();
  }
  // setVerbose(verboseStore); // set to previous value
  // verbose = verboseStore;
  if (n1 == 0) {
    cout << "# Strange error in slice-1 of p-value computation: " << stem << " " << slice1 << " " << slice2 << " " << assemblyVec << " : " << n1 << endl;
    return 0.0;
  }
  if (n2 == 0) {
    cout << "# Strange error in slice-2 of p-value computation: " << stem << " " << slice1 << " " << slice2 << " " << assemblyVec << " : " << n2 << endl;
    return 0.0;
  }
  ASSERT(n1 > 0); // if nothing complementary, how can it be part of a stem?
  ASSERT(n2 > 0);
  double logp1 = log(static_cast<double>(n1)/static_cast<double>(maf->getTotalLength()));
  double logp2 = log(static_cast<double>(n2)/static_cast<double>(maf->getTotalLength()));
  result = 0.5 * (logp1 + logp2); // corresonds to *geometric* mean of probabilities
  if (stem.getLength() > 1) {
    result += computeLogPValue(Stem(stem.getStart() + 1, stem.getStop() +1, stem.getLength()-1)); // recursive
  }
  return result;
}
