/*
 * Written by Bastien Chevreux (BaCh)
 *
 * Copyright (C) 1997-2000 by the German Cancer Research Center (Deutsches
 *   Krebsforschungszentrum, DKFZ Heidelberg) and Bastien Chevreux
 * Copyright (C) 2000 and later by Bastien Chevreux
 *
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the 
 * Free Software Foundation, Inc., 
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 * 
 */

#include <iostream>

#include <boost/unordered_set.hpp> 
#include <boost/lexical_cast.hpp>

//#include <valgrind/callgrind.h>

//#include "readpool.H"
#include "io/generalio.H"
#include "mira/parameters.H"
#include "mira/assembly.H"
#include "mira/assembly_output.H"
#include "mira/maf_parse.H"
#include "caf/caf.H"

#include "version.H"


#ifndef lint
static char vcid[] = "$Id$";
#endif /* lint */


// 	$Id$	





class General {
  typedef boost::unordered_map<std::string, size_t> strintmap;
  static strintmap GE_nameselectionmap;

public:
  static void makeSelectionStringSet(string & filename);
  static bool checkNamePresence(string & name);
  static bool hasNames();
  static size_t getNameOrder(const string & name);
};

General::strintmap General::GE_nameselectionmap;


void General::makeSelectionStringSet(string & filename)
{
  FUNCSTART("void makeSelectionStringSet(string & filename)");

  ifstream fin;
  fin.open(filename.c_str(), ios::in);
  if(!fin){
    MIRANOTIFY(Notify::FATAL, "File not found: " << filename);
  }
  fin.seekg(0, ios::beg);

  string elemname, dummy;
  strintmap::iterator nI;
  uint32 numread=0;
  while(GeneralIO::readKeyValue(fin, elemname,dummy)){
    nI=GE_nameselectionmap.find(elemname);
    if(nI==GE_nameselectionmap.end()) {
      GE_nameselectionmap[elemname]=numread;
      numread++;
    }
  }
  fin.close();

  if(GE_nameselectionmap.empty()) {
    cerr << "ehhh?";
    exit(10);
  }

  FUNCEND();
}



bool General::checkNamePresence(string & name)
{
  if(GE_nameselectionmap.empty()) return true;
  return (GE_nameselectionmap.find(name) != GE_nameselectionmap.end());
}

bool General::hasNames()
{
  return !GE_nameselectionmap.empty();
}

size_t General::getNameOrder(const string & name)
{
  if(GE_nameselectionmap.empty()) return (0-1);
  strintmap::iterator nI=GE_nameselectionmap.find(name);
  if (nI == GE_nameselectionmap.end()) return (0-1);
  return nI->second;
}




class mirafa
{
private:
  static ofstream MFA_fout;

  static list<Contig> MFA_clist;   // needed for CAF conversion (and GBF)

private:
  void usage();


  static void doit2(list<Contig> & contigs, ReadPool & rp);
  static void saveCList(list<Contig> & clist, ReadPool & rp);
  static void cafload_callback(list<Contig> & clist, ReadPool & rp);

public:
  int main(int argc, char ** argv);
};


list<Contig> mirafa::MFA_clist;
ofstream mirafa::MFA_fout;

void mirafa::usage()
{
  cerr << "mirafa\t(MIRALIB version " << MIRALIBVERSION << ")\n\n";
  cerr << "Usage:\n";
  cerr << "  mirafa [-xxx] cafin cafout [optional MIRA settings]\n\n";
  cerr << "Options:\n";
  cerr << "\t-n\t\tnuke all existing SNP and RMB tags in file\n";
}


void mirafa::saveCList(list<Contig> & clist, ReadPool & rp)
{
  Contig::setCoutType(Contig::AS_CAF);
  list<Contig>::iterator I=clist.begin();
  for(;I!=clist.end(); I++){
    MFA_fout << *I;
  }
}

void mirafa::cafload_callback(list<Contig> & clist, ReadPool & rp)
{
  bool dooutput=true;

  Assembly::refreshContigAndReadpoolValuesAfterLoading(rp,clist);
  clist.back().trashConsensusCache(false);

  cout << clist.back().getContigLength() << '\n';

  doit2(clist,rp);

  cout << clist.back().getContigLength() << '\n';

  saveCList(clist, rp);

  clist.clear();
  rp.discard();
}

void mirafa::doit2(list<Contig> & contigs, ReadPool & rp)
{
  FUNCSTART("void mirafa::doit2(list<Contig> & contigs, ReadPool & rp)");
  cout << "Buntifying reads ..." << endl;
  list<Contig>::iterator I=contigs.begin();

  string dir_tmp="./tmp";
  uint8 basesperhash=17;

  for(;I!=contigs.end(); I++){
    I->trashConsensusCache(false);
    I->addRails(10000,0,"",false,"",false);

    for(uint32 rpi=0; rpi<rp.size(); rpi++){
      if(rp[rpi].isRail()){
	rp[rpi].setUsedInAssembly(true);
	rp[rpi].setValidData(true);
      }
    }

    if(ensureDirectory(dir_tmp,true)) {
      MIRANOTIFY(Notify::FATAL, "Could not make sure that directory '" << dir_tmp << "' exists, aborting.");
    }

    {
      Skim s3;
      
      s3.analyseHashes(dir_tmp,
		       rp,
		       false,
		       false,
		       false,
		       true,
		       basesperhash,
		       1,
		       1,
		       true);
      
      Assembly::buntifyReadsByHashFreq(basesperhash, rp);
    }
  }

  FUNCEND();
}



int mirafa::main(int argc, char ** argv)
{
  FUNCSTART("int main(int argc, char ** argv)");

  vector<MIRAParameters> Pv;
  MIRAParameters::setupStdMIRAParameters(Pv);

  Pv[0].setContigDisregardSpuriousRMBMismatches(false);

  ReadPool thepool(&Pv);

  int c;
  extern char *optarg;
  extern int optind;


  string cafin="";
  string strainin="";

  while (1){
    c = getopt(argc, argv, "+h");
    if(c == -1) break;

    switch (c) {
    case 'h': 
    case '?': {
      usage();
      exit(0);
    }
    default : {}
    }
  }

  if(argc-optind < 2) {
    cerr << argv[0] << ": " << "Missing at least infile or outfile as argument!\n";
    usage();
    exit(1);
  }

  string infile=argv[optind++];
  string outfile=argv[optind++];

  if(argc-optind > 0) {
    stringstream tss;
    for(int32 i=optind; i<argc; i++) tss << argv[i] << "  *=BEGIN0=*";
    MIRAParameters::parse(tss,Pv,NULL);
  }

  MIRAParameters::dumpAllParams(Pv, cout);

  MFA_fout.open(outfile.c_str(), ios::out);
  if(!MFA_fout){
    MIRANOTIFY(Notify::FATAL, "Could not open file for saving: " << outfile);
  }

  try{
    CAF tcaf(thepool, MFA_clist, &Pv);
    vector<uint32> dummy;
    tcaf.load(infile,
	      Read::SEQTYPE_SANGER,
	      1,
	      dummy,
	      false, 
	      mirafa::cafload_callback
      );

    //load(&P, infile, strainin);
    //
    //doit();
    //save(outfile);

  }
  catch(Notify n){
    n.handleError("main");
  }
  catch(Flow f){
    cerr << "Unexpected exception: Flow()\n";
  }

  MFA_fout.close();

  FUNCEND();
  return 0;
}













class tagsnp
{
private:
  static ofstream TS_fout;

  static list<Contig> TS_clist;   // needed for CAF conversion (and GBF)

private:
  void usage();


  static void doit2(list<Contig> & contigs);
  static void saveCList(list<Contig> & clist, ReadPool & rp);
  static void cafload_callback(list<Contig> & clist, ReadPool & rp);

public:
  int main(int argc, char ** argv);
};


list<Contig> tagsnp::TS_clist;
ofstream tagsnp::TS_fout;

void tagsnp::usage()
{
  cerr << "tagsnp\t(MIRALIB version " << MIRALIBVERSION << ")\n\n";
  cerr << "Usage:\n";
  cerr << "  tagsnp [-xxx] cafin cafout [optional MIRA settings]\n\n";
  cerr << "Options:\n";
  cerr << "\t-n\t\tnuke all existing SNP and RMB tags in file\n";
//  cerr << "\t-s <filename>\tload strain data from file\n";
//  cerr << "\t-a\t\tassume SNPs instead of repeats\n";
//  cerr << "\t-r <int>\tminimum reads per group (default: 1)\n";
//  cerr << "\t-q <int>\tminimum qual for tagging (default: 30)\n";
//  cerr << "\t-n <int>\tminimum neighbour qual for tagging (default: 20)\n";
//  cerr << "\t-e <int>\tend read exclusion area (default: 25)\n";
//  cerr << "\t-g\t\talso mark gap bases\n";
//  cerr << "\t-m\t\talso mark multicolumn gap bases\n";
  
}


//void tagsnp::save(string & cafout)
//{
//   
//  if(!cafout.empty()){
//    assout::saveAsCAF(contigs,cafout);
//  } else {
//    assout::dumpAsCAF(contigs,cout);
//  }
//
//  //
//  //filename="out.tcs";
//  //assout::saveAsTCS(contigs,filename);
//  
//  //filename="tagsnp_out.gap4da";
//  //assout::saveAsGAP4DA(contigs,filename);
//  
//  //filename="featureanalysis.txt";
//  //assout::saveFeatureAnalysis(400,100,contigs,readpool,
//  //				filename,
//  //				"featuresummary.txt",
//  //				"featureprot.txt");
//
//  //{
//  //  string filename="out.html";
//  //  cout << "Saving contigs to htmlfile: " << filename << endl;
//  //  ofstream out(filename.c_str(), ios::out | ios::trunc);
//  //  assout::dumpContigListAsHTML(contigs, "Super project", out);
//  //  out.close();
//  //}
//}

//void tagsnp::load (MIRAParameters * mp, string & cafin, string & strainin)
//{
//  cerr << "Loading project from CAF file: " << cafin << endl;
//
//  CAF tcaf(readpool, contigs, mp);
//  tcaf.load(cafin);
//
//  if(!strainin.empty()){
//    cerr << "Loading strain data";
//    readpool.loadStrainData(strainin);
//  }
//
//  Assembly::refreshContigAndReadpoolValuesAfterLoading(readpool,contigs);
//}

//void tagsnp::doit(list<Contig> & contigs)
//{
//  cout << "Tagging reads ..." << endl;
//  list<Contig>::iterator I=contigs.begin();
//  for(;I!=contigs.end(); I++){
//    //I->setParams(&P);
//    //
//    //uint32 numSRMB=0;
//    //uint32 numWRMB=0;
//    //uint32 numSNP=0;
//    //I->transposeReadSRMTagsToContig();
//    ////I->markPossibleRepeats(numSRMB, numWRMB, numSNP);
//    //vector<bool> readsmarkedsrm;
//    //I->newMarkPossibleRepeats(numSRMB,readsmarkedsrm);
//  }
//}


void tagsnp::saveCList(list<Contig> & clist, ReadPool & rp)
{
  Contig::setCoutType(Contig::AS_CAF);
  list<Contig>::iterator I=clist.begin();
  for(;I!=clist.end(); I++){
    TS_fout << *I;
  }
}

void tagsnp::cafload_callback(list<Contig> & clist, ReadPool & rp)
{
  bool dooutput=true;

  Assembly::refreshContigAndReadpoolValuesAfterLoading(rp,clist);
  clist.back().trashConsensusCache(false);

  doit2(clist);
  saveCList(clist, rp);

  clist.clear();
  rp.discard();
}

void tagsnp::doit2(list<Contig> & contigs)
{
  cout << "Tagging reads ..." << endl;
  list<Contig>::iterator I=contigs.begin();
  for(;I!=contigs.end(); I++){
    I->trashConsensusCache(false);

    Contig::repeatmarker_stats_t repstats;
    vector<bool> readsmarkedsrm;
    I->newMarkPossibleRepeats(repstats, readsmarkedsrm);

    I->markFeaturesByConsensus(true,true,true);
  }
}



int tagsnp::main(int argc, char ** argv)
{
  FUNCSTART("int main(int argc, char ** argv)");

  vector<MIRAParameters> Pv;
  MIRAParameters::setupStdMIRAParameters(Pv);

  Pv[0].setContigDisregardSpuriousRMBMismatches(false);

  ReadPool thepool(&Pv);

  int c;
  extern char *optarg;
  extern int optind;


  string cafin="";
  string strainin="";

  while (1){
    c = getopt(argc, argv, "+h");
    if(c == -1) break;

    switch (c) {
    case 'h': 
    case '?': {
      usage();
      exit(0);
    }
    default : {}
    }
  }

  if(argc-optind < 2) {
    cerr << argv[0] << ": " << "Missing at least infile or outfile as argument!\n";
    usage();
    exit(1);
  }

  string infile=argv[optind++];
  string outfile=argv[optind++];

  if(argc-optind > 0) {
    stringstream tss;
    for(int32 i=optind; i<argc; i++) tss << argv[i] << "  *=BEGIN0=*";
    MIRAParameters::parse(tss,Pv,NULL);
  }

  MIRAParameters::dumpAllParams(Pv, cout);

  TS_fout.open(outfile.c_str(), ios::out);
  if(!TS_fout){
    MIRANOTIFY(Notify::FATAL, "Could not open file for saving: " << outfile);
  }

  try{
    vector<uint32> dummy;
    CAF tcaf(thepool, TS_clist, &Pv);
    tcaf.load(infile,
	      Read::SEQTYPE_SANGER,
	      1,
	      dummy,
	      false, 
	      cafload_callback
      );

    //load(&P, infile, strainin);
    //
    //doit();
    //save(outfile);

  }
  catch(Notify n){
    n.handleError("main");
  }
  catch(Flow f){
    cerr << "Unexpected exception: Flow()\n";
  }

  TS_fout.close();

  FUNCEND();
  return 0;
}








class ConvPro 
{
private:
  static vector<MIRAParameters> CP_Pv;

  static string CP_fromtype;
  static list<string> CP_totype;

  static list<ofstream *> CP_ofs;


  static string CP_infile;
  static string CP_outbasename;

  static string CP_renamesequences;

  static bool CP_splitcontigs2singlefiles;

  static bool CP_deletestaronlycolumns;
  static bool CP_blinddata;
  static bool CP_fillholesinstraingenomes;
  static bool CP_makecontigs;
  static bool CP_extractreadsinsteadcontigs;
  static bool CP_hardtrim;

  static bool CP_sortbyname;

  static bool CP_mustdeletetargetfiles;

  static bool CP_specialtestcode;

  static base_quality_t CP_minqual;
  static char CP_recalcconopt;
  static char CP_recalcfeatureopt;

  static uint32 CP_minbasecoverage;

  static uint32 CP_mincontiglength;
  static bool   CP_minlengthisclipped;

  static uint32 CP_mincontigcoverage;
  static uint32 CP_minnumreads;
  
  static list<Contig> CP_clist;   // needed for CAF & MAF conversion (and GBF)
  static AssemblyInfo CP_assemblyinfo;

  static uint64 CP_readrenamecounter;

private:
  static void usage();
  static void checkTypes(const string & fromtype,list<string> & totype);
  static void putReadsInContigsAndSave(vector<MIRAParameters> & Pv, ReadPool & rp);
  static void discardShortReads(vector<MIRAParameters> & Pv, ReadPool & rp, uint32 minlength, bool fromclipped);
  static void specialTestCode(list<Contig> & clist, ReadPool & rp);

  static bool contig__nameordercomp(const Contig & a, const Contig & b);
  static void sortContigsByName(list<Contig> & clist);

  static void saveContigList(list<Contig> & clist, ReadPool & rp);
  static void saveContigList_helper(list<Contig> & clist, ReadPool & rp);

  static void saveReadPool(ReadPool & rp, list<ofstream *> & ofs);
  static void cafmafload_callback(list<Contig> & clist, ReadPool & rp);
  static void readpoolload_callback(ReadPool & rp);
  static string createFileNameFromBasePostfixContigAndRead(const string & basename, 
							   char * postfix, 
							   Contig * actcon = NULL, 
							   Read * actread = NULL);
  static uint32 openOFSlist(Contig * optcontig, list<ofstream *> & ofs);
  static void closeOFSList(uint32 howmany, list<ofstream *> & ofs);

public:
  ~ConvPro();

  int main2(int argc, char ** argv);

  static void closeOpenStreams(list<ofstream *> & ofsl);
};

vector<MIRAParameters> ConvPro::CP_Pv;

string ConvPro::CP_fromtype="caf";
list<string> ConvPro::CP_totype;
list<ofstream *> ConvPro::CP_ofs;

string ConvPro::CP_infile;
string ConvPro::CP_outbasename;

string ConvPro::CP_renamesequences;

bool ConvPro::CP_splitcontigs2singlefiles=false;
bool ConvPro::CP_deletestaronlycolumns=false;
bool ConvPro::CP_blinddata=false;
bool ConvPro::CP_fillholesinstraingenomes=false;
bool ConvPro::CP_makecontigs=false;
bool ConvPro::CP_extractreadsinsteadcontigs=false;
bool ConvPro::CP_hardtrim=false;
bool ConvPro::CP_sortbyname=false;

bool ConvPro::CP_mustdeletetargetfiles=true;

bool ConvPro::CP_specialtestcode=false;

base_quality_t ConvPro::CP_minqual=0;
char ConvPro::CP_recalcconopt=' ';
char ConvPro::CP_recalcfeatureopt=' ';

uint32 ConvPro::CP_minbasecoverage=0;

uint32 ConvPro::CP_mincontiglength=0;
bool ConvPro::CP_minlengthisclipped=false;
uint32 ConvPro::CP_mincontigcoverage=1;
uint32 ConvPro::CP_minnumreads=0;

list<Contig> ConvPro::CP_clist;   // needed for CAF conversion (and GBF)
AssemblyInfo ConvPro::CP_assemblyinfo;

uint64 ConvPro::CP_readrenamecounter=1;


ConvPro::~ConvPro()
{
  closeOpenStreams(CP_ofs);
}

void ConvPro::usage()
{
  cout << "convert_project\t(MIRALIB version " << MIRALIBVERSION << ")\n"
    "Author:  Bastien Chevreux\t(bach@chevreux.org)\n"
    "Purpose: convert assembly and sequencing file types.\n\n";
  cout << "Usage:\n"
    "convert_project [-f <fromtype>] [-t <totype> [-t <totype> ...]]\n"
    "\t[-aChimMsuZ]\n"
    "\t[-AcflnNoqrtvxXyz {...}]\n"
    "\t{infile} {basename_for_outfile(s)}\n\n";
  cout << "Options:\n";
  cout << 
    "\t-f <fromtype>\tload this type of project files, where fromtype is:\n"
    "\t   caf\t\t a complete assembly or single sequences from CAF\n"
    "\t   maf\t\t a complete assembly or single sequences from CAF\n"
    "\t   fasta\t sequences from a FASTA file\n"
    "\t   fastq\t sequences from a FASTQ file\n"
    "\t   gbf\t\t sequences from a GBF file\n"
    "\t   phd\t\t sequences from a PHD file\n"
    "\t   fofnexp\t sequences in EXP files from file of filenames\n";
  cout << "\t-t <totype>\twrite the sequences/assembly to this type (multiple\n"
    "\t\t\tmentions of -t are allowed):\n"
    "\t   ace\t\t sequences or complete assembly to ACE\n"
    "\t   caf\t\t sequences or complete assembly to CAF\n"
    "\t   maf\t\t sequences or complete assembly to MAF\n"
    "\t   exp\t\t sequences or complete assembly to EXP files in\n"
    "\t\t\t  directories. Complete assemblies are suited for gap4\n"
    "\t\t\t  import as directed assembly.\n"
    "\t   text\t\t complete assembly to text alignment (only when -f is\n"
    "\t\t\t  caf, maf or gbf)\n"
    "\t   html\t\t complete assembly to HTML (only when -f is caf, maf or\n"
    "\t\t\t  gbf)\n"
    "\t   fasta\t sequences or consensus to FASTA file (qualities to\n"
    "\t\t\t  .qual)\n"
    "\t   fastq\t sequences or consensus to FASTQ file\n"
    "\t   gbf\t\t sequences or consensus to GBF\n"
    "\t   tcs\t\t complete assembly to tcs\n"
    "\t   wig\t\t assembly coverage info to wiggle file\n"
    "\t   hsnp\t\t surrounding of SNP tags (SROc, SAOc, SIOc) to HTML\n"
    "\t\t\t (only when -f is caf, maf or gbf)\n"
    "\t   asnp\t\t analysis of SNP tags\n"
    "\t\t\t (only when -f is caf, maf or gbf)\n"
    "\t   cstats\t contig statistics file like from MIRA\n"
    "\t\t\t (only when source contains contigs)\n"
    "\t   crlist\t contig read list file like from MIRA\n"
    "\t\t\t (only when source contains contigs)\n"
    "\t   clippedfasta\t reads clipped to quality and sequencing vector to\n"
    "\t\t\t FASTA file (qualities to .qual)\n"
    "\t   maskedfasta\t reads where sequencing vector is masked out\n"
    "\t\t\t (with X) to FASTA file (qualities to .qual)\n"
    "\t   scaf\t\t sequences or complete assembly to single sequences CAF\n";
  
  cout << "\t-a\t\tAppend to target files instead of rewriting\n";

  cout <<
    "\t-A <string>\tString with MIRA parameters to be parsed\n"
    "\t\t\t Useful when setting parameters affecting consensus\n"
    "\t\t\t calling like -CO:mrpg etc.\n"
    "\t\t\t E.g.: -a \"454_SETTINGS -CO:mrpg=3\"\n";

  cout << 
    "\t-b\t\tBlind data\n"
    "\t\t\t Replaces all bases in reads/contigs with a 'c'\n";

  cout << "\t-C\t\tPerform hard clip to reads\n"
    "\t\t\t When reading formats which define clipping points, will\n"
    "\t\t\t  save only the unclipped part into the result file.\n"
    "\t\t\t Applies only to files/formats which do not contain\n"
    "\t\t\t  contigs.\n";

  cout << 
    "\t-d\t\t'Delete gap only columns'\n"
    "\t\t\t When output is contigs: delete columns that are\n"
    "\t\t\t  entirely gaps (like after having delete reads during\n"
    "\t\t\t  editing in gap4 or similar)\n"
    "\t\t\t When output is reads: delete gaps in reads\n";

  cout << 
    "\t-m\t\tMake contigs (only for -t = caf or maf)\n"
    "\t\t\t Encase single reads as contig singlets into the CAF/MAF\n"
    "\t\t\t file.\n";
  cout << 
    "\t-n <filename>\twhen given, selects only reads or contigs given by\n"
    "\t\t\t name in that file.\n";
//  cout << "\t-i\t\twhen -n is used, inverts the selection\n";
  cout << 
    "\t-o\t\tfastq quality Offset (only for -f = 'fastq')\n"
    "\t\t\t Offset of quality values in FASTQ file. Default of 0\n"
    "\t\t\t tries to automatically recognise.\n";

  cout << 
    "\t-R <name>\tRename contigs/singlets/reads with given name string\n"
    "\t\t\t to which a counter is appended.\n"
    "\t\t\t Known bug: will create duplicate names if input\n"
    "\t\t\t  contains contigs/singlets as well as free reads, i.e.\n"
    "\t\t\t  reads not in contigs nor singlets.\n";

  // TODO: re-adapt these switches to >2.9.8
//  cout << "\t-s <filename>\twhen loading assemblies from files that do not contain\n";
//  cout << "\t\t\t strain information (e.g. CAF), load the strain\n";
//  cout << "\t\t\t information from this file. (2 columns, tab delimited:\n";
//  cout << "\t\t\t readname left, strain name right)\n";
//

//

  cout << 
    "\n\t--------------------------------------------------------\n"
    "\tThe following switches work only when input (CAF or MAF)\n"
    "\tcontains contigs. Beware: CAF and MAf can also contain\n"
    "\tjust reads.\n"
    "\t--------------------------------------------------------\n\n";

  // TODO: check if ok for >2.9.8
  cout << 
    "\t-M\t\tDo not extract contigs (or their consensus), but the\n"
    "\t\t\t  sequence of the reads they are composed of.\n";
  cout << 
    "\t-N <filename>\tlike -n, but sorts output according to order given\n"
    "\t\t\t in file. (works currently only for contigs)\n";
  cout << 
    "\t-r [cCqf]\tRecalculate consensus and / or consensus quality values\n"
    "\t\t\t and / or SNP feature tags.\n"
    "\t\t\t 'c' recalc cons & cons qualities (with IUPAC)\n"
    "\t\t\t 'C' recalc cons & cons qualities (forcing non-IUPAC)\n"
    "\t\t\t 'q' recalc consensus qualities only\n"
    "\t\t\t 'f' recalc SNP features\n"
    "\t\t\t Note: only the last of cCq is relevant, f works as a\n"
    "\t\t\t  switch and can be combined with cQq (e.g. \"-r C -r f\")\n"
    "\t\t\t Note: if the CAF/MAF contains multiple strains,\n"
    "\t\t\t recalculation of cons & cons qualities is forced, you\n"
    "\t\t\t  can just influence whether IUPACs are used or not.\n";
  cout << 
    "\t-s\t\tsplit output into multiple files instead of creating a\n"
    "\t\t\t single file\n";
  cout << 
    "\t-u\t\t'fillUp strain genomes'\n"
    "\t\t\t Fill holes in the genome of one strain (N or @)\n"
    "\t\t\t with sequence from a consensus of other strains\n"
    "\t\t\t Takes effect only with -r and -t gbf or fasta/q\n"
    "\t\t\t in FASTA/Q: bases filled up are in lower case\n"
    "\t\t\t in GBF: bases filled up are in upper case\n";

  cout << 
    "\t-q <integer>\tDefines minimum quality a consensus base of a strain\n"
    "\t\t\t must have, consensus bases below this will be 'N'\n"
    "\t\t\t Default: 0\n"
    "\t\t\t Only used with -r, and -f is caf/maf and -t is (fasta\n"
    "\t\t\t  or gbf)\n";
  cout << 
    "\t-v <integer>\tDefines minimum coverage a consensus base of a strain\n"
    "\t\t\t must have, bases with coverage below this will be 'N'\n"
    "\t\t\t Default: 0\n"
    "\t\t\t Only used with -r, and -t is (fasta\n"
    "\t\t\t  or gbf)\n";

  cout << 
    "\t-x <integer>\tMinimum contig or read length\n"
    "\t\t\t When loading, discard all contigs / reads with a\n"
    "\t\t\t length less than this value. Default: 0 (=switched off)\n"
    "\t\t\t Note: not applied to reads in contigs!\n";
  cout << 
    "\t-X <integer>\tSimilar to -x but applies only to reads and\n"
    "\t\t\t then to the clipped length.\n";

  cout << 
    "\t-y <integer>\tMinimum average contig coverage\n"
    "\t\t\t When loading, discard all contigs with an\n"
    "\t\t\t average coverage less than this value.\n"
    "\t\t\t Default: 1\n";

  cout << 
    "\t-z <integer>\tMinimum number of reads in contig\n"
    "\t\t\t When loading, discard all contigs with a\n"
    "\t\t\t number of reads less than this value.\n"
    "\t\t\t Default: 0 (=switched off)\n";


  cout << 
    "\t-l <integer>\twhen output as text or HTML: number of bases shown in\n"
    "\t\t\t one alignment line. Default: 60.\n"
    "\t-c <character>\twhen output as text or HTML: character used to pad\n"
    "\t\t\t endgaps. Default: ' ' (blank)\n";

  cout << "\nAliases:\n"
    "caf2html, exp2fasta, ... etc. Any combination of \"<validfromtype>2<validtotype>\"\ncan be used as program name (also using links) so as that convert_project\nautomatically sets -f and -t accordingly.\n";

  cout << "\nExamples:\n"
    "\tconvert_project -f caf -t fasta -t wig -t ace source.caf dest\n"
    "\tconvert_project -f caf -t caf -x 2000 -y 10 source.caf dest\n"
    "\tcaf2html -l 100 -c . source.caf dest\n";
}


void ConvPro::checkTypes(const string & fromtype,list<string> & totype)
{
  if(!(fromtype=="caf"
       || fromtype=="maf"
       || fromtype=="phd"
       || fromtype=="gbf"
       || fromtype=="fasta"
       || fromtype=="fastq"
       || fromtype=="fofnexp"
       )){
    usage();
    cout << endl;
    cerr << "Unknown or illegal file type '" << fromtype << "' defined as <fromtype>\n";
    exit(1);
  }
  if(CP_totype.empty()){
    CP_totype.push_back(fromtype);
  }
  for(list<string>::iterator ttI= CP_totype.begin(); ttI!=CP_totype.end(); ++ttI){
    if(!(*ttI=="fasta"
	 || *ttI=="fastq"
	 || *ttI=="clippedfasta"
	 || *ttI=="maskedfasta"
	 || *ttI=="caf"
	 || *ttI=="maf"
	 || *ttI=="ace"
	 || *ttI=="scaf"
	 || *ttI=="exp"
	 || *ttI=="gbf"
	 || *ttI=="tcs"
	 || *ttI=="text"
	 || *ttI=="txt"
	 || *ttI=="html"
	 || *ttI=="wiggle"
	 || *ttI=="wig"
	 || *ttI=="asnp"
	 || *ttI=="hsnp"
	 || *ttI=="cstats"
	 || *ttI=="crlist"
	 )){
      usage();
      cout << endl;
      cerr << "Unknown or illegal file type '" << *ttI << "' defined as <totype>\n";
      exit(1);
    }
    //if(*ttI=="html"){
    //  cerr << "SORRY: HTML output is currently de-activated\n";
    //  exit(1);
    //}
  }

}




void ConvPro::specialTestCode(list<Contig> & clist, ReadPool & rp)
{
  list<Contig>::iterator I=clist.begin();
  for(;I!=clist.end(); I++){

    vector<bool> readsmarkedsrm;
    if(1){
      cout << "\nMarking tricky 454 / Solexa overcalls in temporary contig.\n";
      cout << "Marked " << I->editTrickyOvercalls(true,false,readsmarkedsrm) << " reads.\n";
      bool newreptmarked=Assembly::markRepeats(*I, readsmarkedsrm);
    }
    I->markFeaturesByConsensus(true,true,true);
    I->editSingleDiscrepancyNoHAFTag(readsmarkedsrm);

  }
}



void ConvPro::putReadsInContigsAndSave(vector<MIRAParameters> & Pv, ReadPool & rp)
{
  for(uint32 i=0; i<rp.size(); i++) {
    if(!rp[i].hasQuality()
       && Pv[0].getAssemblyParams().as_backbone_basequals>0) {
      rp[i].setQualities(Pv[0].getAssemblyParams().as_backbone_basequals);
    }
    
    Contig con(&Pv, rp);
    CP_clist.push_back(con);
    CP_clist.back().addFirstRead(i,1);    
    saveContigList_helper(CP_clist, rp);
    CP_clist.clear();
  }
}

void ConvPro::discardShortReads(vector<MIRAParameters> & Pv, ReadPool & rp, uint32 minlength, bool fromclipped)
{
  for(uint32 i=0; i<rp.size(); i++) {
    uint32 len;
    if(fromclipped){
      len=rp[i].getLenClippedSeq();
    }else{
      len=rp[i].getLenSeq();
    }
    if(len<minlength) rp[i].discard();
  }
}



//string ConvPro::createFileNameFromBasePostfixContigAndRead(const string & basename, string & postfix, Contig * actcon, Read * actread)
string ConvPro::createFileNameFromBasePostfixContigAndRead(const string & basename, char * postfix, Contig * actcon, Read * actread)
{
  string filename=basename;
  if(actcon != NULL){
    if(!filename.empty()) filename+='_';
    filename+=actcon->getContigName();
  }else if(actread != NULL){
    if(!filename.empty()) filename+='_';
    filename+=actread->getName();
  }
  filename+=postfix;
  return filename;
}


bool ConvPro::contig__nameordercomp(const Contig & a, const Contig & b)
{
  return General::getNameOrder(a.getContigName()) < General::getNameOrder(b.getContigName());
}


void ConvPro::sortContigsByName(list<Contig> & clist)
{
  clist.sort(contig__nameordercomp);
}


void ConvPro::saveContigList_helper(list<Contig> & clist, ReadPool & rp)
{
  FUNCSTART("void ConvPro::saveContigList_helper(list<Contig> & clist, ReadPool & rp)");

  if(CP_specialtestcode) specialTestCode(clist,rp);

  //{
  //  cout << "CLISTSIZE: " << clist.size() << endl;
  //  list<Contig>::iterator cI=clist.begin();
  //  for(; cI != clist.end(); cI++){
  //    cout << "cname: " << cI->getContigName() << endl;
  //  }
  //}

  BUGIFTHROW(!CP_ofs.empty() && CP_ofs.size() != CP_totype.size(), "Ooops? !CP_ofs.empty() && CP_ofs.size() != CP_totype.size() ???");

  list<ofstream *>::iterator ofsI= CP_ofs.begin();
  list<string>::iterator ttI= CP_totype.begin();
  for(; ttI!=CP_totype.end(); ++ttI, ++ofsI){
    if(*ttI=="scaf"){
      //clear_conandrp=false;
    }else if(*ttI=="hsnp"){
      MIRAParameters::generateProjectOutNames(CP_Pv,CP_outbasename);
      string fn;
      if(CP_splitcontigs2singlefiles){
	fn=createFileNameFromBasePostfixContigAndRead(
	  CP_Pv[0].getAssemblyParams().as_outfile_stats_snpenvironment,
	  ".html",
	  &clist.front());
      }else{
	fn=createFileNameFromBasePostfixContigAndRead(
	  CP_Pv[0].getAssemblyParams().as_outfile_stats_snpenvironment,
	  ".html");
      }
      assout::saveSNPSurroundingAsHTML(clist,fn,CP_mustdeletetargetfiles);
    }else if(*ttI=="cstats"){
      MIRAParameters::generateProjectOutNames(CP_Pv,CP_outbasename);
      string fn;
      if(CP_splitcontigs2singlefiles){
	fn=createFileNameFromBasePostfixContigAndRead(
	  CP_Pv[0].getAssemblyParams().as_outfile_stats_contigstats,
	  ".txt",
	  &clist.front());
      }else{
	fn=createFileNameFromBasePostfixContigAndRead(
	  CP_Pv[0].getAssemblyParams().as_outfile_stats_contigstats,
	  ".txt");
      }
      assout::saveStatistics(clist,fn,CP_mustdeletetargetfiles);
    }else if(*ttI=="crlist"){
      MIRAParameters::generateProjectOutNames(CP_Pv,CP_outbasename);
      string fn;
      if(CP_splitcontigs2singlefiles){
	fn=createFileNameFromBasePostfixContigAndRead(
	  CP_Pv[0].getAssemblyParams().as_outfile_stats_crlist,
	  ".txt",
	  &clist.front());
      }else{
	fn=createFileNameFromBasePostfixContigAndRead(
	  CP_Pv[0].getAssemblyParams().as_outfile_stats_crlist,
	  ".txt");
      }
      assout::saveContigReadList(clist,fn,CP_mustdeletetargetfiles);
    }else if(*ttI=="asnp"){
      MIRAParameters::generateProjectOutNames(CP_Pv,CP_outbasename);
      string fn,fa,fs,fc;

      if(CP_splitcontigs2singlefiles){
	fn=createFileNameFromBasePostfixContigAndRead(
	  CP_Pv[0].getAssemblyParams().as_outfile_stats_snpanalysis,
	  ".txt",
	  &clist.front());
	fa=createFileNameFromBasePostfixContigAndRead(
	  CP_Pv[0].getAssemblyParams().as_outfile_stats_featureanalysis,
	  ".txt",
	  &clist.front());
	fs=createFileNameFromBasePostfixContigAndRead(
	  CP_Pv[0].getAssemblyParams().as_outfile_stats_featuresummary,
	  ".txt",
	  &clist.front());
	fc=createFileNameFromBasePostfixContigAndRead(
	  CP_Pv[0].getAssemblyParams().as_outfile_stats_featuresequences,
	  ".txt",
	  &clist.front());
      }else{
	fn=createFileNameFromBasePostfixContigAndRead(
	  CP_Pv[0].getAssemblyParams().as_outfile_stats_snpanalysis,
	  ".txt");
	fa=createFileNameFromBasePostfixContigAndRead(
	  CP_Pv[0].getAssemblyParams().as_outfile_stats_featureanalysis,
	  ".txt");
	fs=createFileNameFromBasePostfixContigAndRead(
	  CP_Pv[0].getAssemblyParams().as_outfile_stats_featuresummary,
	  ".txt");
	fc=createFileNameFromBasePostfixContigAndRead(
	  CP_Pv[0].getAssemblyParams().as_outfile_stats_featuresequences,
	  ".txt");
      }

      assout::saveSNPList(clist,fn,CP_mustdeletetargetfiles);
      assout::saveFeatureAnalysis(clist,rp,
				  fa,fs,fc,
				  CP_mustdeletetargetfiles);

    }else if(*ttI=="fasta"){
      //CALLGRIND_START_INSTRUMENTATION;
      string bn;
      if(CP_splitcontigs2singlefiles){
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  "",
	  &clist.front());
      }else{
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  "");
      }
      assout::saveStrainsAsFASTAQ(clist, 
				  rp,
				  bn,
				  false,
				  CP_minbasecoverage,
				  CP_minqual,
				  CP_mustdeletetargetfiles,
				  CP_fillholesinstraingenomes);
    }else if(*ttI=="fastaqual"){
      // fastaqual is "do-nothing" as "fasta" also write fastaqual here! 
    }else if(*ttI=="fastq"){
      //CALLGRIND_START_INSTRUMENTATION;
      string bn;
      if(CP_splitcontigs2singlefiles){
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  "",
	  &clist.front());
      }else{
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  "");
      }
      assout::saveStrainsAsFASTAQ(clist, 
				  rp, 
				  bn,
				  true,
				  CP_minbasecoverage,
				  CP_minqual,
				  CP_mustdeletetargetfiles,
				  CP_fillholesinstraingenomes);
    } else if(*ttI=="caf"){
      Contig::setCoutType(Contig::AS_CAF);
      list<Contig>::iterator I=clist.begin();
      for(;I!=clist.end(); I++){
	if(CP_recalcfeatureopt=='f') I->markFeaturesByConsensus(true,true,true);
	if(CP_recalcfeatureopt=='r') {
	  vector<bool> dummy;
	  Assembly::markRepeats(*I,dummy);
	}
	bool mustclose=false;
	if(!(*ofsI)->is_open()){
	  string bn;
	  if(CP_splitcontigs2singlefiles){
	    bn=createFileNameFromBasePostfixContigAndRead(
	      CP_outbasename,
	      ".caf",
	      &clist.front());
	  }else{
	    bn=createFileNameFromBasePostfixContigAndRead(
	      CP_outbasename,
	      ".caf",
	      NULL);
	  }
	  (*ofsI)->open(bn.c_str(), ios::out);
	  mustclose=true;
	}
	*(*ofsI) << *I;
	if(mustclose){
	  (*ofsI)->close();
	}
      }
    } else if(*ttI=="maf"){
      Contig::setCoutType(Contig::AS_MAF);
      list<Contig>::iterator I=clist.begin();
      for(;I!=clist.end(); I++){
	if(CP_recalcfeatureopt=='f') I->markFeaturesByConsensus(true,true,true);
	if(CP_recalcfeatureopt=='r') {
	  vector<bool> dummy;
	  Assembly::markRepeats(*I,dummy);
	}
	if(!(*ofsI)->is_open()){
	  string bn;
	  if(CP_splitcontigs2singlefiles){
	    bn=createFileNameFromBasePostfixContigAndRead(
	      CP_outbasename,
	      ".maf",
	      &clist.front());
	  }else{
	    bn=createFileNameFromBasePostfixContigAndRead(
	      CP_outbasename,
	      ".maf",
	      NULL);
	  }
	  (*ofsI)->open(bn.c_str(), ios::out);
	}
	*(*ofsI) << *I;
	if(CP_splitcontigs2singlefiles){
	  (*ofsI)->close();
	}
      }
    } else if(*ttI=="html"){
      //cerr << "HTML output currently deactivated in development version!\n";
      //exit(1);
      string bn;
      if(CP_splitcontigs2singlefiles){
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  ".html",
	  &clist.front());
      }else{
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  ".html");
      }
      assout::dumpContigListAsHTML(clist, bn, CP_mustdeletetargetfiles, CP_outbasename);
    } else if(*ttI=="text"
	      || *ttI=="txt"){
      string bn;
      if(CP_splitcontigs2singlefiles){
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  ".txt",
	  &clist.front());
      }else{
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  ".txt");
      }
      assout::saveAsTXT(clist, bn, CP_mustdeletetargetfiles);
    } else if(*ttI=="exp"){
      // outbasename is in this case a directory name
      string bn;
      if(CP_splitcontigs2singlefiles){
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  "",
	  &clist.front());
      }else{
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  "");
      }
      assout::saveAsGAP4DA(clist,bn,CP_mustdeletetargetfiles);
    } else if(*ttI=="gbf"){
      // outbasename is in this case the basename name
      string bn;
      if(CP_splitcontigs2singlefiles){
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  "",
	  &clist.front());
      }else{
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  "");
      }
      assout::saveStrainsAsGBF(clist,
			       rp,
			       bn,
			       CP_minqual,
			       CP_fillholesinstraingenomes,
			       CP_mustdeletetargetfiles);
    } else if(*ttI=="ace"){
      // outbasename is in this case the basename name
      string bn;
      if(CP_splitcontigs2singlefiles){
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  ".ace",
	  &clist.front());
      }else{
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  ".ace");
      }
      assout::saveAsACE(clist,bn,CP_mustdeletetargetfiles);
    } else if(*ttI=="tcs"){
      // outbasename is in this case the basename name
      string bn;
      if(CP_splitcontigs2singlefiles){
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  ".tcs",
	  &clist.front());
      }else{
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  ".tcs");
      }
      assout::saveAsTCS(clist,bn,CP_mustdeletetargetfiles);
    } else if(*ttI=="wiggle" || *ttI=="wig"){
      // outbasename is in this case the basename name
      string bn;
      if(CP_splitcontigs2singlefiles){
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  ".wig",
	  &clist.front());
      }else{
	bn=createFileNameFromBasePostfixContigAndRead(
	  CP_outbasename,
	  ".wig");
      }
      assout::saveAsWiggle(clist,bn,CP_mustdeletetargetfiles);
    } else {
      cerr << "\n\n-t " << *ttI << " is not a valid 'to' type when converting contigs (sorry). But maybe something went wrong, please contact the author.\n";
      exit(1);
    }
  }

  if(!CP_splitcontigs2singlefiles){
    CP_mustdeletetargetfiles=false;
  }

  FUNCEND();
}

void ConvPro::saveContigList(list<Contig> & clist, ReadPool & rp)
{
  bool dosomeoutput=false;

  list<Contig>::iterator cI=clist.begin();
  for(; cI != clist.end(); cI++){
    bool conout=true;
    
    if(CP_mincontiglength>0
       && cI->getContigLength() < CP_mincontiglength){
      conout=false;
    } else {
      Contig::constats_t constats(cI->getStats());
      
      //cI->stats(cout);
      
      if(CP_mincontigcoverage>0
	 && constats.avg_coverage < CP_mincontigcoverage){
	conout=false;
      } else if(CP_minnumreads>0
		&& constats.total_reads < CP_minnumreads){
	conout=false;
      } 
    }

    if(conout){
      string cname(cI->getContigName());
      if(!General::checkNamePresence(cname)){
	conout=false;
      }
    }

    // delete contigs which should not be output
    // TODO:
    //  would generally be better to have that in some loading callback (and
    //  would work for contigs well enough), but readpool mechanisms would
    //  not at the moment, too primitive
    if(!conout){
      cI=clist.erase(cI);
      if(cI != clist.begin()) --cI;
    }

    dosomeoutput|=conout;
  }

  if(dosomeoutput){
    for(cI=clist.begin(); cI != clist.end(); cI++){
      if(CP_deletestaronlycolumns) {
	cI->deleteStarOnlyColumns(0,cI->getContigLength());
      }
      if(CP_blinddata){
	cI->blindContig();
      }
    }

    Assembly::refreshContigAndReadpoolValuesAfterLoading(rp,clist);
    
    // TODO: ! make autoconfigure: on several strains, this is needed!
    //  else, let user define via switch
    for(cI=clist.begin(); cI != clist.end(); cI++){
      if(CP_recalcconopt=='c'
	 || CP_recalcconopt=='C'){
	cI->trashConsensusCache(false);
      }
      if(CP_recalcconopt=='q'){
	cI->trashConsensusCache(true);
      }
      
      CP_assemblyinfo.storeContigStats(cI->getStats());
    }

    saveContigList_helper(CP_clist, rp);
  }

}

void ConvPro::saveReadPool(ReadPool & rp, list<ofstream *> & ofs)
{
  FUNCSTART("void ConvPro::saveReadPool(ReadPool & rp, list<ofstream *> & ofs)");

  if(CP_deletestaronlycolumns) {
    for(uint32 i=0; i<rp.size(); i++) {
      rp.getRead(i).removeGapsFromRead();
    }
  }
  if(CP_blinddata) {
    for(uint32 i=0; i<rp.size(); i++) {
      rp.getRead(i).blindSeqData('c');
    }
  }
  if(General::hasNames()){
    for(uint32 i=0; i<rp.size(); i++) {
      string rname(rp[i].getName());
      if(!General::checkNamePresence(rname)){
	rp[i].discard();
      }
    }	
  }
  if(CP_hardtrim){
    for(uint32 i=0; i<rp.size(); ++i) {
      rp[i].performHardTrim();
    }
  }
  if(CP_mincontiglength>0){
    discardShortReads(CP_Pv,rp,CP_mincontiglength,CP_minlengthisclipped);
  }

  if(!CP_renamesequences.empty()){
    string tmpname;
    for(uint32 i=0; i<rp.size(); ++i){
      tmpname=CP_renamesequences+"_"+boost::lexical_cast<std::string>(CP_readrenamecounter++);
      rp[i].setName(tmpname);
    }
  }

  if(CP_makecontigs) {
    putReadsInContigsAndSave(CP_Pv, rp);
  }else{
    list<string>::iterator ttI= CP_totype.begin();
    list<ofstream *>::iterator ofsI= ofs.begin();
    for(; ttI!=CP_totype.end(); ++ttI, ++ofsI){
      BUGIFTHROW(!(*(*ofsI)).is_open(), *ttI << " file stream not open???");
      if(*ttI=="fasta"){
	// double indirection because iterator needs one and it is a list of ofstream pointers ...
	rp.dumpAs(*(*ofsI),Read::AS_FASTA,false);
      } else if(*ttI=="fastaqual"){
	rp.dumpAs(*(*ofsI),Read::AS_FASTAQUAL,false);
      } else if(*ttI=="maskedfasta"){
	rp.dumpAs(*(*ofsI),Read::AS_MASKEDMASKFASTA,false);
      } else if(*ttI=="maskedfastaqual"){
	rp.dumpAs(*(*ofsI),Read::AS_MASKEDMASKFASTAQUAL,false);
      } else if(*ttI=="fastq"){
	rp.dumpAs(*(*ofsI),Read::AS_FASTQ,false);
      } else if(*ttI=="caf" || *ttI=="scaf" ){
	rp.dumpAs(*(*ofsI),Read::AS_CAF,false);
      } else if(*ttI=="maf"){
	rp.dumpAs(*(*ofsI),Read::AS_MAF,false);
      } else {
	cout.flush();
	cerr << "\n\n-t " << *ttI << " is not a valid type for saving a readpool (internal)!\n";
	//usage();
	exit(1);
      }
    }
  }

  FUNCEND();
}

uint32 ConvPro::openOFSlist(Contig * optcontig, list<ofstream *> & ofs)
{
  FUNCSTART("uint32 ConvPro::openOFSlist(Contig * optcontig, list<ofstream *> & ofs)");
  BUGIFTHROW(CP_totype.empty(), " CP_totype.empty() ???");

  uint32 mustclose=0;
  ofstream * ofstmp;

  for(list<string>::iterator ttI= CP_totype.begin(); ttI!=CP_totype.end(); ++ttI){
    ofstmp=new ofstream;
    ofs.push_back(ofstmp);
    if(*ttI=="fasta"){
      ofs.back()->open(createFileNameFromBasePostfixContigAndRead(CP_outbasename,".fasta",optcontig).c_str(), ios::out);
      ++mustclose;
    } else if(*ttI=="fastaqual"){
      ofs.back()->open(createFileNameFromBasePostfixContigAndRead(CP_outbasename,".fasta.qual",optcontig).c_str(), ios::out);
      ++mustclose;
    } else if(*ttI=="maskedfasta"){
      ofs.back()->open(createFileNameFromBasePostfixContigAndRead(CP_outbasename,".fasta",optcontig).c_str(), ios::out);
      ++mustclose;
    } else if(*ttI=="maskedfastaqual"){
      ofs.back()->open(createFileNameFromBasePostfixContigAndRead(CP_outbasename,".fasta.qual",optcontig).c_str(), ios::out);
      ++mustclose;
    } else if(*ttI=="fastq"){
      ofs.back()->open(createFileNameFromBasePostfixContigAndRead(CP_outbasename,".fastq",optcontig).c_str(), ios::out);
      ++mustclose;
	} else if(*ttI=="caf" || *ttI=="scaf" ){
      ofs.back()->open(createFileNameFromBasePostfixContigAndRead(CP_outbasename,".caf",optcontig).c_str(), ios::out);
      ++mustclose;
    } else if(*ttI=="maf"){
      ofs.back()->open(createFileNameFromBasePostfixContigAndRead(CP_outbasename,".maf",optcontig).c_str(), ios::out);
      ++mustclose;
    }
  }
  return mustclose;
}

void ConvPro::closeOFSList(uint32 howmany, list<ofstream *> & ofs)
{
  FUNCSTART("uint32 ConvPro::closeOFSList(uint32 howmany)");
  BUGIFTHROW(howmany>ofs.size(),"howmany>ofs.size() ???");
  for(uint32 i=0; i<howmany; ++i){
    delete ofs.back();
    ofs.pop_back();
  }
  FUNCEND();
}


void ConvPro::cafmafload_callback(list<Contig> & clist, ReadPool & rp)
{
  {
    list<Contig>::iterator cI=clist.begin();
    for(; cI != clist.end(); ++cI){
      if(!CP_renamesequences.empty()){
	cI->setContigName("");
	cI->setContigNamePrefix(CP_renamesequences);
      }
    }
  }
  if(!clist.empty() && !CP_extractreadsinsteadcontigs){
    saveContigList(clist,rp);
  }else{
    list<ofstream *> ofs;
    uint32 mustclose=0;
    if(CP_splitcontigs2singlefiles && !clist.empty()){
      mustclose=openOFSlist(&clist.front(),ofs);
      saveReadPool(rp,ofs);
    }else{
      saveReadPool(rp,CP_ofs);
    }
    closeOFSList(mustclose,ofs);
  }

  Read::trashReadNameContainer();
  clist.clear();
  rp.discard();
}

void ConvPro::readpoolload_callback(ReadPool & rp)
{
  // TODO: check if needed (slows loading by ~30 to 50%
//  rp.makeTemplateIDs(false);
//  rp.makeStrainIDs(false);

  saveReadPool(rp,CP_ofs);

  Read::trashReadNameContainer();
  rp.discard();
}


void ConvPro::closeOpenStreams(list<ofstream *> & ofsl)
{
  list<ofstream *>::iterator ofsI= ofsl.begin();
  for(; ofsI!=ofsl.end(); ++ofsI){
    delete *ofsI;
  }
}

int ConvPro::main2(int argc, char ** argv)
{
  //CALLGRIND_STOP_INSTRUMENTATION;

  FUNCSTART("int main2(int argc, char ** argv)");

  int c;
  extern char *optarg;
  extern int optind;

  base_quality_t fqqualoffset=0;

  string strainfile="";

  int32 linelen=60;
  char  endgap_fillchar=' ';

  string namefile="";
  bool keepnamesfromfile=true;

  string path;
  string convertprog;
  splitFullPathAndFileName(argv[0],path,convertprog);

  {
    std::transform(convertprog.begin(),
		   convertprog.end(),
		   convertprog.begin(), 
		   (int(*)(int))std::tolower); // now, that's what I call ugly
    string sep="2";
    string::size_type seppos=string::npos;
    seppos=convertprog.find_first_of(sep,0);
    if(seppos!=string::npos){
      CP_fromtype=convertprog.substr(0, seppos);
      CP_totype.push_back(convertprog.substr(seppos+1,100));
    }
  }

  string miraparams;

  //"CZihumMsl:r:c:f:t:s:q:n:N:v:x:X:y:z:o:a:"
  const char pstring[]=
    "abCdhimMsuZ"
    "A:c:f:l:n:N:o:q:r:R:t:v:x:X:y:z:";

  while (1){
    c = getopt(argc, argv, pstring);
    if(c == -1) break;

    switch (c) {
    case 'a': {
      CP_mustdeletetargetfiles=false;
      break;
    }
    case 'A': {
      miraparams=optarg;
      break;
    }
    case 'C': {
      CP_hardtrim=true;
      break;
    }
    case 'f': {
      CP_fromtype=optarg;
      break;
    }
    case 't': {
      CP_totype.push_back(optarg);
      break;
    }
    case 's': {
      CP_splitcontigs2singlefiles=true;
      break;
    }
    case 'r': {
      string rrr=optarg;
      for(size_t si=0; si<rrr.size(); si++){
	switch(rrr[si]){
	case 'c' :
	case 'C' :
	case 'q' : {
	  CP_recalcconopt=rrr[si];
	  break;
	}
	case 'f' : 
	case 'r' : {
	  CP_recalcfeatureopt=rrr[si];
	  break;
	}
	default : {
	  cerr << "ERROR: -r must be one of c, C, q, f, r\n";
	  usage();	
	  exit(1);	
	}
	}
      }
      break;
    }
    case 'R': {
      CP_renamesequences=optarg;
      break;
    }
    case 'c': {
      string egfc=optarg;
      if(egfc.size()!=1){
	usage();
	cout << endl;
	cerr << "ERROR: -c must be a single character\n";
	exit(1);	
      }
      endgap_fillchar=egfc[0];
      break;
    }
    case 'q': {
      CP_minqual=atoi(optarg);
      if(CP_minqual >100) {
	usage();
	cout << endl;
	cerr << "ERROR: -q must be <= 100\n";
	exit(1);
      }
      break;
    }
    case 'v': {
      CP_minbasecoverage=atoi(optarg);
      break;
    }
    case 'x': {
      CP_mincontiglength=atoi(optarg);
      break;
    }
    case 'X': {
      CP_mincontiglength=atoi(optarg);
      CP_minlengthisclipped=true;
      break;
    }
    case 'y': {
      CP_mincontigcoverage=atoi(optarg);
      break;
    }
    case 'z': {
      CP_minnumreads=atoi(optarg);
      break;
    }
    case 'o': {
      fqqualoffset=atoi(optarg);
      break;
    }
    case 'l': {
      linelen=atoi(optarg);
      if(linelen <= 0) {
	usage();
	cout << endl;
	cerr << "ERROR: -l must be >=0\n";
	exit(1);
      }
      break;
    }
    case 'u': {
      CP_fillholesinstraingenomes=true;
      break;
    }
    case 'm': {
      CP_makecontigs=true;
      CP_extractreadsinsteadcontigs=false;
      break;
    }
    case 'M': {
      CP_extractreadsinsteadcontigs=true;
      CP_makecontigs=false;
      break;
    }
    case 'd': {
      CP_deletestaronlycolumns=true;
      break;
    }
    case 'b': {
      CP_blinddata=true;
      break;
    }
    case 'n': {
      namefile=optarg;
      break;
    }
    case 'N': {
      namefile=optarg;
      CP_sortbyname=true;
      break;
    }
    case 'i': {
      cerr << "SORRY: -" << static_cast<char>(c) << " is currently de-activated\n";
      exit(1);
      keepnamesfromfile=false;
      break;
    }
    case 'Z': {
      CP_specialtestcode=true;
      break;
    }
    case 'h': 
    case '?': {
      usage();
      exit(0);
    }
    default : {}
    }
  }

  if(argc-optind < 1) {
    usage();
    cout << endl;
    cerr << argv[0] << ": " << "Missing infile and out-basename as arguments!\n";
    exit(1);
  }

  if(argc-optind < 2) {
    usage();
    cout << endl;
    cerr << argv[0] << ": " << "Missing either infile or out-basename as arguments!\n";
    exit(1);
  }

  if(argc-optind > 2) {
    usage();
    cout << endl;
    cerr << argv[0] << ": " << "Whoops, found more than infile and out-basename as arguments left on the command line!\n";
    cerr << "Unparsed command line: ";
    for(;optind<argc;optind++) cerr <<argv[optind] << " ";  
    cerr << endl;
    exit(1);
  }

  CP_infile=argv[optind++];
  CP_outbasename=argv[optind];

  if(CP_infile=="--help"){
    usage();
    exit(0);
  }

  checkTypes(CP_fromtype,CP_totype);

  MIRAParameters::setupStdMIRAParameters(CP_Pv);
  if(!miraparams.empty()){
    cout << "Parsing special MIRA parameters: " << miraparams << endl;
    MIRAParameters::parse(miraparams.c_str(),CP_Pv);
    cout << "Ok.\n";
  }

  CP_Pv[0].setContigAlignmentOutputTextLineLen(linelen);
  CP_Pv[0].setContigAlignmentOutputHTMLLineLen(linelen);
  CP_Pv[0].setContigAlignmentOutputTextGapPad(endgap_fillchar);
  CP_Pv[0].setContigAlignmentOutputHTMLGapPad(endgap_fillchar);

  for(uint32 i=0; i< CP_Pv.size(); i++){
    CP_Pv[i].setAssemblyFASTQQualOffset(fqqualoffset);
  }

  ReadPool thepool(&CP_Pv);

  CP_assemblyinfo.setLargeContigSize(CP_mincontiglength);
  CP_assemblyinfo.setLargeTotalCov(CP_mincontigcoverage);

  if(CP_recalcconopt=='C'){
    for(uint32 i=0; i< CP_Pv.size(); i++){
      CP_Pv[i].setContigForceNonIUPAC(true,true);
    }
  }

  if(!namefile.empty()){
    General::makeSelectionStringSet(namefile);
  }

  cout << "Loading from " << CP_fromtype << ", saving to:";
  ofstream * ofstmp;
  for(list<string>::iterator ttI= CP_totype.begin(); ttI!=CP_totype.end(); ++ttI){
    ofstmp=new ofstream;
    CP_ofs.push_back(ofstmp);
    cout << ' ' << *ttI;
    if(*ttI=="fasta"){
      if(!CP_splitcontigs2singlefiles){
	CP_ofs.back()->open(createFileNameFromBasePostfixContigAndRead(CP_outbasename,".fasta").c_str(), ios::out);
      }
      CP_totype.push_back("fastaqual");
    } else if(*ttI=="fastaqual"){
      if(!CP_splitcontigs2singlefiles){
	CP_ofs.back()->open(createFileNameFromBasePostfixContigAndRead(CP_outbasename,".fasta.qual").c_str(), ios::out);
      }
    } else if(*ttI=="maskedfasta"){
      if(!CP_splitcontigs2singlefiles){
	CP_ofs.back()->open(createFileNameFromBasePostfixContigAndRead(CP_outbasename,".fasta").c_str(), ios::out);
      }
      CP_totype.push_back("maskedfastaqual");
    } else if(*ttI=="maskedfastaqual"){
      if(!CP_splitcontigs2singlefiles){
	CP_ofs.back()->open(createFileNameFromBasePostfixContigAndRead(CP_outbasename,".fasta.qual").c_str(), ios::out);
      }
    } else if(*ttI=="fastq"){
      if(!CP_splitcontigs2singlefiles){
	CP_ofs.back()->open(createFileNameFromBasePostfixContigAndRead(CP_outbasename,".fastq").c_str(), ios::out);
      }
    } else if(*ttI=="caf" || *ttI=="scaf" ){
      if(!CP_splitcontigs2singlefiles){
	CP_ofs.back()->open(createFileNameFromBasePostfixContigAndRead(CP_outbasename,".caf").c_str(), ios::out);
      }
    } else if(*ttI=="maf"){
      if(!CP_splitcontigs2singlefiles){
	CP_ofs.back()->open(createFileNameFromBasePostfixContigAndRead(CP_outbasename,".maf").c_str(), ios::out);
      }
    } else if(*ttI=="hsnp"){
    } else if(*ttI=="asnp"){
    } else if(*ttI=="cstats"){
    } else if(*ttI=="crlist"){
    } else if(*ttI=="html"){
    } else if(*ttI=="text"){
    } else if(*ttI=="txt"){
    } else if(*ttI=="exp"){
    } else if(*ttI=="gbf"){
    } else if(*ttI=="ace"){
    } else if(*ttI=="tcs"){
    } else if(*ttI=="wiggle" || *ttI=="wig"){
    } else {
      cout.flush();
      cerr << "\n\n-t " << *ttI << " is not a handled 'to' type\n";
      //usage();
      exit(1);
    }
  }
  cout << '\n';

  try{
    if(CP_fromtype=="caf" || CP_fromtype=="maf") {
      void (*usecallback)(list<Contig> &, ReadPool &) = cafmafload_callback;
      if(CP_sortbyname){
	usecallback=NULL;
      }

      if(CP_fromtype=="caf") {
	CAF tcaf(thepool, CP_clist, &CP_Pv);
	vector<uint32> dummy;
	tcaf.load(CP_infile.c_str(), 
		  Read::SEQTYPE_SANGER,
		  1,
		  dummy,
		  false, 
		  usecallback
	  );
      }else if(CP_fromtype=="maf") {
	MAFParse mafp(thepool, CP_clist, &CP_Pv);
	vector<uint32> dummy;
	mafp.load(CP_infile.c_str(), 
		  Read::SEQTYPE_SANGER,
		  1,
		  dummy,
		  false, 
		  usecallback
	  );
      }
      if(usecallback==NULL){
	sortContigsByName(CP_clist);
	cafmafload_callback(CP_clist,thepool);
      }
    }else{
      uint32 dummy=0;
      if(CP_fromtype=="fasta"){
	cout << "Loading data from FASTA ...";
	thepool.loadDataFromFASTA(CP_infile,2, dummy, false, CP_infile+".qual",false,Read::SEQTYPE_SANGER,false,readpoolload_callback);
      } else if(CP_fromtype=="fastq"){
	cout << "Loading data from FASTQ ...";
	thepool.loadDataFromFASTQ(CP_infile,2, dummy,false,Read::SEQTYPE_SANGER,false,readpoolload_callback);
      } else if(CP_fromtype=="gbf") {
	cout << "Loading data from GBF ...";
	thepool.loadDataFromGBF(CP_infile);
	saveReadPool(thepool,CP_ofs);
      } else if(CP_fromtype=="fofnexp"){
	cout << "Loading data from EXP in file of filenames ...";
	thepool.loadEXPs(CP_infile,1, dummy);
	thepool.loadQualitiesFromSCF(false, false,"/dev/null","/dev/null");
	saveReadPool(thepool,CP_ofs);
      } else {
	cerr << "\n\n-f " << CP_fromtype << " is not a valid from type! (simple pool)\n";
	//usage();
	exit(1);
      }
      cout << " done.\n"; 
    }
  }
  catch(Notify n){
    // Need to close by hand as handleError() will perform a hard exit
    closeOpenStreams(CP_ofs);
    n.handleError("main");
  }
  catch(Flow f){
    cerr << "Unexpected exception: Flow()\n";
  }
  catch(...){
    cout.flush();
    cerr.flush();
    cerr << "Unknown exception caught, aborting the process.\n\nPlease contact: bach@chevreux.org\n\n";
    abort();
  }

  cout << "\nData conversion process finished, no obvious errors encountered.\n";

  FUNCEND();
  return 0;
}




class MiraBait 
{
private:


  static vector<MIRAParameters> MB_Pv;

  static string MB_fromtype;
  static list<string> MB_totype;

  static list<ofstream *> MB_ofs;


  static string MB_baitfile;
  static string MB_infile;
  static string MB_outbasename;

  static bool MB_deletestaronlycolumns;
  static bool MB_inversehit;
  static bool MB_fwdandrev;
  static uint32 MB_numbaithits;

  static bool MB_mustdeletetargetfiles;

  static list<Contig> MB_clist;   // needed for CAF conversion (and GBF)

  static Skim MB_skim;

private:
  static void usage();
  static void checkTypes(string & fromtype,list<string> & totype);
  static void putReadsInContigsAndSave(vector<MIRAParameters> & Pv, ReadPool & rp);
  static void specialTestCode(list<Contig> & clist, ReadPool & rp);

  static void saveReadPool(ReadPool & rp);
  static void cafmafload_callback(list<Contig> & clist, ReadPool & rp);
  static void readpoolload_callback(ReadPool & rp);

public:
  ~MiraBait();

  int main(int argc, char ** argv);

};

vector<MIRAParameters> MiraBait::MB_Pv;

string MiraBait::MB_fromtype="fastq";
list<string> MiraBait::MB_totype;
list<ofstream *> MiraBait::MB_ofs;

string MiraBait::MB_infile;
string MiraBait::MB_baitfile;
string MiraBait::MB_outbasename;

bool   MiraBait::MB_deletestaronlycolumns=false;
bool   MiraBait::MB_mustdeletetargetfiles=true;
bool   MiraBait::MB_inversehit=false;
bool   MiraBait::MB_fwdandrev=true;
uint32 MiraBait::MB_numbaithits=1;

list<Contig> MiraBait::MB_clist;   // needed for CAF conversion (and GBF)

Skim MiraBait::MB_skim;;


MiraBait::~MiraBait()
{
  ConvPro::closeOpenStreams(MB_ofs);
}

void MiraBait::usage()
{
  cout << "mirabait\t(MIRALIB version " << MIRALIBVERSION << ")\n";
  cout << "Author: Bastien Chevreux\t(bach@chevreux.org)\n\n";

  cout << "... baiting ...\n";
  cout << "Usage:\n";
  //cout << "\tconvert_project [-f <fromtype>] [-t <totype>] [-s strainfile] [-q] infile outfile\n\n";
  cout << "mirabait [-f <fromtype>] [-t <totype> [-t <totype> ...]] [-iklor] baitfile infile <basename_for_outfile(s)>\n\n";
  cout << "Options:\n";
  cout << "\t-f <fromtype>\tload this type of project files, where fromtype is:\n"
    "\t   caf\t\t sequences from CAF\n"
    "\t   maf\t\t sequences from MAF\n"
    "\t   phd\t\t sequences from a PHD file\n"
    "\t   gbf\t\t sequences from a GBF file\n"
    "\t   fasta\t sequences from a FASTA file\n"
    "\t   fastq\t sequences from a FASTQ file\n";
  cout << "\t-t <totype>\twrite the sequences to this type (multiple mentions\n"
    "\t\t\tof -t are allowed):\n"
    "\t   fasta\t sequences to FASTA file\n"
    "\t   fastq\t sequences to FASTQ file\n"
    "\t   caf\t\t sequences to CAF\n"
    "\t   maf\t\t sequences to MAF\n";

  cout << "\n"
    "\t-k\t\tk-mer, length of bait in bases (<32, default=31)\n"
    "\t-n\t\tMin. number of k-mer baits needed (default=1)\n"
    "\t-i\t\tInverse hit: writes only sequences that do not hit bait\n"
    "\t-r\t\tNo checking of reverse complement direction\n";

  cout << "\n"
    "\t-o\t\tfastq quality Offset (only for -f = 'fastq')\n"
    "\t\t\t Offset of quality values in FASTQ file. Default: 33\n"
    "\t\t\t A value of 0 tries to automatically recognise.\n";



//  cout << "\t-a <string>\tString with MIRA parameters to be parsed\n"
//    "\t\t\t Useful when setting parameters affecting consensus\n"
//    "\t\t\t calling like -CO:mrpg etc.\n"
//    "\t\t\t E.g.: -a \"454_SETTINGS -CO:mrpg=3\"\n";

  cout << "\nExamples:\n"
    "\t...\n"
    "\t...\n";
}


void MiraBait::checkTypes(string & fromtype,list<string> & totype)
{
  if(fromtype.empty()){
    fromtype="fastq";
  }
  if(!(fromtype=="caf"
       || fromtype=="maf"
       || fromtype=="phd"
       || fromtype=="gbf"
       || fromtype=="exp"
       || fromtype=="fasta"
       || fromtype=="fastq"
       )){
    usage();
    cout << endl;
    cerr << "Unknown or illegal file type '" << fromtype << "' defined as <fromtype>\n";
    exit(1);
  }
  if(MB_totype.empty()){
    if(fromtype=="caf"
       || fromtype=="maf"
       || fromtype=="fasta"
       || fromtype=="fastq"
      ){
      MB_totype.push_back(fromtype);
    }else{
      MB_totype.push_back("fastq");
    }
  }
  for(list<string>::iterator ttI= MB_totype.begin(); ttI!=MB_totype.end(); ++ttI){
    if(*ttI=="scaf") *ttI="caf";
    if(!(*ttI=="fasta"
	 || *ttI=="fastq"
	 || *ttI=="caf"
	 || *ttI=="maf"
	 )){
      usage();
      cout << endl;
      cerr << "Unknown or illegal file type '" << *ttI << "' defined as <totype>\n";
      exit(1);
    }
  }
}

// Note: clears the readpool after saving!
void MiraBait::saveReadPool(ReadPool & rp)
{
  // first, bait all reads. Those who bite, discard.
  for(uint32 i=0; i<rp.size(); ++i){
    if((MB_skim.checkBaitHit(rp[i]) >= MB_numbaithits) ^ !MB_inversehit){
      rp[i].discard();
    }
  }

  // then save the read pool
  list<string>::iterator ttI= MB_totype.begin();
  list<ofstream *>::iterator ofsI= MB_ofs.begin();
  for(; ttI!=MB_totype.end(); ++ttI, ++ofsI){
    if(*ttI=="fasta"){
      // double indirection because iterator needs one and it is a list of ofstream pointers ...
      rp.dumpAs(*(*ofsI),Read::AS_FASTA,false);
    } else if(*ttI=="fastaqual"){
      rp.dumpAs(*(*ofsI),Read::AS_FASTAQUAL,false);
    } else if(*ttI=="fastq"){
      rp.dumpAs(*(*ofsI),Read::AS_FASTQ,false);
    } else if(*ttI=="caf" || *ttI=="scaf" ){
      rp.dumpAs(*(*ofsI),Read::AS_CAF,false);
    } else if(*ttI=="maf"){
      rp.dumpAs(*(*ofsI),Read::AS_MAF,false);
    } else {
      cout.flush();
      cerr << "\n\n-t " << *ttI << " is not a valid type when the source file does not contain a full assembly!\n";
      //usage();
      exit(1);
    }
  }
}


void MiraBait::cafmafload_callback(list<Contig> & clist, ReadPool & rp)
{
  // TODO: check if needed
  Assembly::refreshContigAndReadpoolValuesAfterLoading(rp,clist);

  saveReadPool(rp);

  Read::trashReadNameContainer();
  clist.clear();
  rp.discard();
}

void MiraBait::readpoolload_callback(ReadPool & rp)
{
  // TODO: check if needed (slows loading by ~30 to 50%
//  rp.makeTemplateIDs(false);
//  rp.makeStrainIDs(false);

  saveReadPool(rp);

  Read::trashReadNameContainer();
  rp.discard();
}



int MiraBait::main(int argc, char ** argv)
{
  //CALLGRIND_STOP_INSTRUMENTATION;

  FUNCSTART("int main(int argc, char ** argv)");

  int c;
  extern char *optarg;
  extern int optind;


  base_quality_t fqqualoffset=33;

  string path;
  string convertprog;
  splitFullPathAndFileName(argv[0],path,convertprog);

  string miraparams;

  uint8 basesperhash=31;

  while (1){
    c = getopt(argc, argv, "hdirf:t:o:a:k:n:");
    if(c == -1) break;

    switch (c) {
    case 'a': {
      miraparams=optarg;
      break;
    }
    case 'f': {
      MB_fromtype=optarg;
      break;
    }
    case 'n': {
      MB_numbaithits=atoi(optarg);
      break;
    }
    case 'k': {
      uint64 bla=atoi(optarg);
      if(bla>31) bla=31;
      basesperhash=bla;
      break;
    }
    case 't': {
      MB_totype.push_back(optarg);
      break;
    }
    case 'o': {
      fqqualoffset=atoi(optarg);
      break;
    }
    case 'd': {
      MB_deletestaronlycolumns=true;
      break;
    }
    case 'i': {
      MB_inversehit=true;
      break;
    }
    case 'r': {
      MB_fwdandrev=false;
      break;
    }
    case 'h': 
    case '?': {
      usage();
      exit(0);
    }
    default : {}
    }
  }

  if(argc-optind < 1) {
    cerr << argv[0] << ": " << "Missing baitfile, infile and out-basename as arguments!\n";
    usage();
    exit(1);
  }

  if(argc-optind < 3) {
    cerr << argv[0] << ": " << "Missing one of baitfile, infile or out-basename as argument!\n";
    usage();
    exit(1);
  }

  if(argc-optind > 3) {
    cerr << argv[0] << ": " << "Whoops, found more than baitfile, infile and out-basename as arguments left on the command line!\n";
    cerr << "Unparsed command line: ";
    for(;optind<argc;optind++) cerr <<argv[optind] << " ";  
    cerr << endl;
    usage();
    exit(1);
  }

  MB_baitfile=argv[optind++];
  MB_infile=argv[optind++];
  MB_outbasename=argv[optind];

  if(MB_baitfile=="--help"){
    usage();
    exit(0);
  }

  checkTypes(MB_fromtype,MB_totype);

  MIRAParameters::setupStdMIRAParameters(MB_Pv);
  if(!miraparams.empty()){
    cout << "Parsing special MIRA parameters: " << miraparams << endl;
    MIRAParameters::parse(miraparams.c_str(),MB_Pv);
    cout << "Ok.\n";
  }

  for(uint32 i=0; i< MB_Pv.size(); i++){
    MB_Pv[i].setAssemblyFASTQQualOffset(fqqualoffset);
  }

  {
    ReadPool baitrp(&MB_Pv);
    cout << "Loading baits ...";
    uint32 dummy=0;
    baitrp.loadDataFromFASTA(MB_baitfile,1, dummy, false,"",false);

    cout << "baitrp.size(): " << baitrp.size() << endl;

    MB_skim.provideHashStatistics(".",baitrp,false,false,true,MB_fwdandrev,1,basesperhash,1);
  }

  ReadPool loadrp(&MB_Pv);

  cout << "Loading from " << MB_fromtype << ", saving to:";
  ofstream * ofstmp;
  for(list<string>::iterator ttI= MB_totype.begin(); ttI!=MB_totype.end(); ++ttI){
    cout << ' ' << *ttI;
    ofstmp=new ofstream;
    MB_ofs.push_back(ofstmp);
    if(*ttI=="fasta"){
      MB_ofs.back()->open((MB_outbasename + ".fasta").c_str(), ios::out);
    } else if(*ttI=="fastq"){
      MB_ofs.back()->open((MB_outbasename + ".fastq").c_str(), ios::out);
    } else if(*ttI=="caf" || *ttI=="scaf" ){
      MB_ofs.back()->open((MB_outbasename + ".caf").c_str(), ios::out);
    } else if(*ttI=="maf"){
      MB_ofs.back()->open((MB_outbasename + ".maf").c_str(), ios::out);
    } else {
      cout.flush();
      cerr << "\n\n-t " << *ttI << " is not a valid type\n";
      //usage();
      exit(1);
    }
  }
  cout << '\n';

  try{
    if(MB_fromtype=="caf") {
      CAF tcaf(loadrp, MB_clist, &MB_Pv);
      vector<uint32> dummy;
      tcaf.load(MB_infile.c_str(), 
		Read::SEQTYPE_SANGER,
		1,
		dummy,
		false, 
		cafmafload_callback
	);
    }else if(MB_fromtype=="maf") {
      MAFParse mafp(loadrp, MB_clist, &MB_Pv);
      vector<uint32> dummy;
      mafp.load(MB_infile.c_str(), 
		Read::SEQTYPE_SANGER,
		1,
		dummy,
		false, 
		cafmafload_callback
	);
    }else{
      uint32 dummy=0;
      if(MB_fromtype=="fasta"){
	cout << "Loading data from FASTA ...";
	loadrp.loadDataFromFASTA(MB_infile,2, dummy, false, MB_infile+".qual",false,Read::SEQTYPE_SANGER,false,readpoolload_callback);
      } else if(MB_fromtype=="fastq"){
	cout << "Loading data from FASTQ ...";
	loadrp.loadDataFromFASTQ(MB_infile,2, dummy,false,Read::SEQTYPE_SANGER,false,readpoolload_callback);
      } else if(MB_fromtype=="gbf") {
	cout << "Loading data from GBF ...";
	loadrp.loadDataFromGBF(MB_infile);
	saveReadPool(loadrp);
      } else {
	cerr << "\n\n-f " << MB_fromtype << " is not a valid type!\n";
	//usage();
	exit(1);
      }
      cout << " done.\n"; 
    }
  }
  catch(Notify n){
    // Need to close by hand as handleError() will perform a hard exit
    ConvPro::closeOpenStreams(MB_ofs);
    n.handleError("main");
  }
  catch(Flow f){
    cerr << "Unexpected exception: Flow()\n";
  }
  catch(...){
    cerr << "Unknown exception caught, aborting the process.\n\nPlease contact: bach@chevreux.org\n\n";
    abort();
  }

  cout << "\nBaiting process finished.\n";

  FUNCEND();
  return 0;
}



int main(int argc, char ** argv)
{
  //CALLGRIND_STOP_INSTRUMENTATION;

  FUNCSTART("int main(int argc, char ** argv)");

  string path;
  string convertprog;
  splitFullPathAndFileName(argv[0],path,convertprog);

  std::transform(convertprog.begin(),
		 convertprog.end(),
		 convertprog.begin(), 
		 (int(*)(int))std::tolower); // now, that's what I call ugly
  
  try {
    if(convertprog=="tagsnp"){
      tagsnp t;
      t.main(argc, argv);
    }else if(convertprog=="mirafa"){
      mirafa f;
      f.main(argc, argv);
    }else if(convertprog=="mirabait"){
      MiraBait m;
      m.main(argc, argv);
    }else{
      ConvPro cp;
      cp.main2(argc, argv);
    }
  }
  catch(Notify n){
    n.handleError("main");
  }
  catch(Flow f){
    cout << "INTERNAL ERROR: Unexpected exception: Flow()\n";
    exit(100);
  }
  catch(const std::bad_alloc & e){
    cout << "Out of memory detected, exception message is: ";
    cout << e.what() << endl; 

    if(sizeof(size_t) == sizeof(int32)){
      cout << "\nYou are running a 32 bit executable. Please note that the maximum"
	"\ntheoretical memory a 32 bit programm can use (be it in Linux, Windows or"
	"\nother) is 4 GiB, in practice less: between 2.7 and 3.3 GiB. This is valid"
	"\neven if your machine has hundreds of GiB."
	"\nShould your machine have more that 4 GiB, use a 64 bit OS and a 64 bit"
	"\nversion of MIRA.";
    }

    cout << "\n\nIf you have questions on why this happened, please send the last 1000"
      "\nlines of the output log (or better: the complete file) to the author"
      "\ntogether with a short summary of your assembly project.\n\n";

    exit(100);
  }
  catch(const ios_base::failure & e){
    cout << "Failure in IO stream detected, exception message is: "
	 << e.what() << endl
	 << "\nWe perhaps ran out of disk space or hit a disk quota?\n";
    exit(100);
  }
  catch (exception& e)
  {
    cout << "A 'standard' exception occured (that's NOT normal):\n" << e.what() << "\n\nIf the cause is not immediatly obvious, please contact: bach@chevreux.org\n\n";
    exit(100);
  }
  catch(...){
    cout << "Unknown exception caught, aborting the process.\n\nPlease contact: bach@chevreux.org\n\n";
    exit(100);
  }

  Read::dumpStringContainerStats(cout);
  return 0;
}
