#!/usr/bin/env python
#
#    Copyright 2020 Ming-Feng Hsieh (jmfhsieh@gmail.com)
#
#    This file is part of Clover.
#
#    Clover is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    Clover is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with Clover; if not, write to the Free Software
#    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#
#
import math, random, sys, time,copy,pickle,os,kpgraph

infile1=None
infile2=None
i_number=1000000000000000
t_number=0
c_number=5
s_number=5
p_number=4
insertsize=None
insertstd=None
insertstdfraction=0.2
k_mer=40
p_mer=1
i_mer=None
#m_mer= k_mer/(p_mer+1)
m_mer= None
metric = 'alignment'
removetransitivemethod='removetransitiveandconflict'
reference=None
arg=list(sys.argv[:])
arg[0]=arg[0].split('.')[0]
arg[0]=arg[0].split('/')[-1]
#outname='output '+' '.join(arg)
outname='out'
arg=arg[1:]
#rdna: reverse of DNA
rdna={'A':'T', 'C':'G', 'G':'C', 'T':'A', 'N':'N' }
#itd: integer to DNA
#dti: DNA to integer
itd=('A','C','G','T')
dti={'A':0, 'C':1, 'G':2, 'T':3 }
#cstd color space to DNA
cstd={'0':'A', '1':'C', '2':'G', '3':'T'}
splitparameter=0.3
homogeneousparameter=-0.8
minlen=200
tips=None
prerep=None
premate=None
lateestimate=None
repeattype=0
repeatsort='w'
repeatorder='i'
frag=0
rlparameter=0.0
rwparameter=0.0
bubbleidentity=0.90
kmerweightlist =[1]*1000
repeatparameter=0.0
#ununiformparameter=0.2
#totallength=0
idealfraction1= 0.8
idealfraction2=0.65
idealfraction3=0.5
idealnumber=5
idealaccuracy =0.96
idealerrorfraction = (1.0 - idealaccuracy)/ 3.0
#kpiit_mer=[k_mer,p_mer,i_mer,i_number,t_number]
#for i in range(len(arg)):
# kpiit_mer[i]=int(arg[i])
#(k_mer,p_mer,i_mer,i_number,t_number)=kpiit_mer

def getkmer(s):
 return [s[i:i+k_mer] for i in range(0,len(s)+1-k_mer,1)]

def getnode(contigpool,nodepool):
 i=0
 for c in contigpool:
  k=getkmer(c.sequence)
  l=len(k)
  for j in range(l):
   nodepool[i][1]=k[j]
   i+=1
 return nodepool

def getset(nodepool,w):
 return [tempnode[1] for tempnode in nodepool if tempnode[0]>= w]

def switch():
 temp=sys.stdout
 sys.stdout=ss
 globals()['ss']=temp

def isnum(ss):
 a=['0','1','2','3','4','5','6','7','8','9']
 return all([s in a for s in ss ])

def isint(ss):
 a=['0','1','2','3','4','5','6','7','8','9']
 if ss[0] == '-':
  ss=ss[1:]
 return all([s in a for s in ss ])

def isfloat(ss):
 a= ss.split('.')
 return (len(a)==2) and isint(a[0]) and isnum(a[1])

def getvalue(aa):
 if isint(aa):
  return int(aa)
 if isfloat(aa):
  return float(aa)
 return str(aa)

def getvalues(aa):
 if ',' in aa:
  aa=aa.split(',')
  return [getvalue(aa[i]) for i in range(len(aa))]
 return getvalue(aa)

def fileinformation():
 print ''
 print 'Clover is the command line tool. The user can run Clover straightforward with '
 print 'following parameters:'
 print ''
 print '  $ clover -k <Length of k-mer> [options] -i1 <Input file1> [-i2 <Input file2>]'
 print ''
 print ''
 print 'If only a read file is used without mate pair, the parameter -i2 can be omitted.'
 print 'For example, if one library of single read file frag.fastq is used:'
 print '  $ clover -k 40 -i1 frag.fastq'
 print ''
 print 'If paired read files are used, file name of -i2 must correspond to -i1.'
 print ''
 print 'For example, if one library of paired read files frag1.fq and frag2.fq is used:'
 print '  $ clover -k 40 -i1 frag1.fq -i2 frag2.fq'
 print ''
 print 'If two libraries of paired read files frag1.fq, frag2.fq, short1.fq and short2.fq '
 print 'are used, where assume that frag1.fq corresponds to frag2.fq and short1.fq '
 print 'corresponds to short2.fq:'
 print '  $ clover -k 40 -i1 frag1.fq,short1.fq -i2 frag2.fq,short2.fq'
 print ''
 print 'The file formats accepted by Clover are \'fasta\' and \'fastq\', which can be '
 print 'distinguished by their filename extensions (.fa, .fasta, .fq, .fastq, .fatq).'
 print ''
 print 'For more information, please type:'
 print '  $ clover -h'

def helpinformation():
 print 'Clover is the command line tool. The user can run Clover straightforward with '
 print 'following parameters:'
 print ''
 print '  $ clover -k <Length of k-mer> [options] -i1 <Input file1> [-i2 <Input file2>]'
 print ''
 print '1. A simple Clover example:'
 print ''
 print 'To assemble a paired read files, type:'
 print '  $ clover -k 40 -i1 frag1.fq -i2 frag2.fq'
 print ''
 print 'It produces the following files:'
 print 'out_contig.fasta - contig prediction using reads information.'
 print 'out_scaffold.fasta - super contig prediction after scaffolding on mate-pair information.'
 print 'systemfile... - intermediate files that could be removed after execution.'
 print 'If the intermediate files are reserved, Clover could run faster from the intermediate result'
 print ' when rerun on the same inputs and -k and -p.'
 print ''
 print 'First of all, we must define a parameter type, list:'
 print 'A list is a series of parameters more than two, which separated by \',\'.'
 print 'For example, a list of 3 integers: 3,5,7 and a list of two filenames: frag1.fastq,short1.fastq.'
 print ''
 print '  -k    [integer] (default 40)'
 print '        Length of k-mer'
 print '  -i1   [filename or list of filenames]'
 print '        Input file1'
 print '  -i2   [filename or list of filenames]'
 print '        Input file2'
 print ''
 print 'If paired read files are used, file name of -i2 must correspond to -i1.'
 print ''
 print 'For example, if two libraries of paired read files frag1.fq, frag2.fq, short1.fq and short2.fq '
 print 'are used, where assume that frag1.fq corresponds to frag2.fq and short1.fq '
 print 'corresponds to short2.fq:'
 print '  $ clover -k 40 -i1 frag1.fq,short1.fq -i2 frag2.fq,short2.fq'
 print ''
 print 'If only a read file is used without mate pair, the parameter -i2 can be omitted.'
 print 'For example, if one library of single read file frag.fastq is used:'
 print '  $ clover -k 40 -i1 frag.fastq'
 print ''
 print 'The file formats accepted by Clover are \'fasta\' and \'fastq\', which can be '
 print 'distinguished by their filename extensions (.fa, .fasta, .fq, .fastq, .fatq).'
 print ''
 print '2. Important and frequently used options:'
 print ''
 print '  -p    [integer] (default 1 and constrain to p < k)'
 print '        Edit distance when clustering k-mers'
 print '  -o    [filename] (default out)'
 print '        the prefix of the Output file'
 print '  -is   [integer or list of integers]'
 print '        Insert sizes of fragment libraries, and the order must '
 print '        correspond to the input files.'
 print '        For example, if two libraries of paired read files with insert '
 print '        size 180 and 3500 are used: frag1.fq, frag2.fq, short1.fq and '
 print '        short2.fq.'
 print '          $ clover -k 40 -p 1 -is 180,3500 -i1 frag1.fq,short1.fq -i2 '
 print '            frag2.fq,short2.fq'
 print '        If we omit -is, Clover would automatic estimate the insert size.'
 print '        For example, if two libraries of paired read files with unknown '
 print '        insert size are used: frag1.fq, frag2.fq, short1.fq and '
 print '        short2.fq.'
 print '          $ clover -k 40 -p 1 -i1 frag1.fq,short1.fq -i2 '
 print '            frag2.fq,short2.fq'
 print '  -ss   [integer or list of integers] (default 5)'
 print '        Sufficient support for scaffolding If there are multiple '
 print '        libraries, set it to a list of numbers and the order must '
 print '        correspond to the input files.'
 print '        If there are multiple libraries with setting of an integer,   '
 print '        Clover may apply the same integer on all libraries.'
 print '  -cs   [integer] (default 5)'
 print '        Sufficient support for contig linking by a shorter k-mer '
 print ''
 print '3. Advanced options:'
 print ''
 print '  -m    [integer] (default (k/2)+1 and constrain to m < k)'
 print '        Minimum length of k-mer for contig linking by a shorter k-mer'
 print '  -ml   [integer] (default 200)'
 print '        Minimum length of contig before outputting'
 print '  -sp   [fraction] (default 0.3 and constrain to 0<= sp <= 1)'
 print '        Split coefficient to split node when containing several major consensus sequences'
 print '  -hp   [fraction] (default 0.8 and constrain to 0<= hp <= 1)'
 print '        Homogeneous coefficient of distribution of input reads '
 print '        If consider the input reads are pretty homogeneous, may set it '
 print '        to 1.0, if consider input reads are pretty heterogeneous, may '
 print '        set it to 0.6 or less.'
 print '  -rp   [fraction] (default 0.0 and constrain to 0<= rp <= 1)'
 print '        Repeating coefficient '
 print '        If set it to 0.0, Clover would not execute the process to '
 print '        resolve repeat. '
 print '        If set it greater than 0, Clover would resolve repeats acording '
 print '        to the condition given by rp and we usually set it to 0.8 if '
 print '        needed. '
 print '        Like hp, rp relates to the homogeneous situation in the repeat '
 print '        region. A higher rp gives a tighter condition to resolve repeat, '
 print '        a lower rp gives a looser condition to resolve repeat that would '
 print '        produce more errors.'
 print '  -ie   [fraction] (default 0.01333333 and constrain to 0<= ie <= 1)'
 print '        Background probability of sequencing error to a certain nucleic '
 print '        acid.'
 print '        If consider the input reads are very accurate, may set it to  '
 print '        0.0.'
 print ''
 print '4. Flag arguments:'
 print ''
 print '  -t    [ ]'
 print '        Executes pruning of tips and erroneous connections.'
 print '  -f    [ ]'
 print '        Gives an earlier execution of scaffolding on fragment read set '
 print '        before cleaning of contig with length less than ml.'
 print '  -pm   [ ]'
 print '        Gives an earlier execution of contig linking by a shorter k-mer '
 print '        before trim low-frequency edges.'
 print '  -pr   [ ]'
 print '        Gives an earlier execution of resolving repeats if rp greater '
 print '        than 0.0 before cleaning of contig with length less than ml.'
 print ''
 print 'For example, to use the -f flag:'
 print '  $ clover -k 40 -p 1 -i1 frag1.fq,short1.fq -i2 frag2.fq,short2.fq -ss 5 -cs 7 -f'
 print ''
 print ''
 print 'SEE ALSO'
 print ''
 print 'For more information, please browse \'Test Case\' of our Website.'
 print ''
 print ''
 print 'CONTACT INFORMATION'
 print ''
 print 'We would like to hear your comments and suggestions. Please browse our Website or Email to us.'
 print '  jmfhsieh@gmail.com'
 print ''
 print ''
 print ''

def setglobals(aa):
 gv=globals()
 bb=aa[:]
 parameter={'-k':'k_mer','-p':'p_mer','-m':'m_mer','-i1':'infile1','-i2':'infile2','-o':'outname','-is':'insertsize','-ss':'s_number','-cs':'c_number','-ml':'minlen','-sp':'splitparameter','-hp':'homogeneousparameter','-rp':'repeatparameter','-ie':'idealerrorfraction'}
 option={'-t':'tips','-f':'frag','-pm':'premate','-pr':'prerep','-le':'lateestimate'}
# for i in range(0,len(aa),2):
#  gv[aa[i]]=getvalues(aa[i+1])
 while len(bb):
  if bb[0] in parameter:
   if bb[0] == '-o':
    gv[parameter[bb.pop(0)]]=str(bb.pop(1))
   else:
    gv[parameter[bb.pop(0)]]=getvalues(bb.pop(1))
  elif bb[0] in option:
   gv[option[bb.pop(0)]]=1
  elif bb[0] in ['-h','-H']:
   helpinformation()
   sys.exit(1)
  else:
   bb.pop(0)
 if infile1==None:
  print 'Please set the input file.'
  fileinformation()
  sys.exit(1)
 if isinstance(infile1,list):
  for cc in infile1:
   if not os.path.isfile(cc):
    print cc,'not found.'
    fileinformation()
    sys.exit(1)
 else:
  if not os.path.isfile(infile1):
   print infile1,'not found.'
   fileinformation()
   sys.exit(1)
 if infile2 !=None:
  if isinstance(infile2,list):
   if isinstance(infile1,list):
    if len(infile1) != len(infile2):
     print 'Files not match.'
     fileinformation()
     sys.exit(1)
   for cc in infile2:
    if not os.path.isfile(cc):
     print cc,'not found.'
     fileinformation()
     sys.exit(1)
  else:
   if isinstance(infile1,list):
    print 'Files not match.'
    fileinformation()
    sys.exit(1)
   if not os.path.isfile(infile2):
    print infile2,'not found.'
    fileinformation()
    sys.exit(1)

def setconfig(arg):
 if 'config' in arg:
  ai= arg.index('config')
  arg.pop(ai)
  conf=arg.pop(ai)
  conf=open(conf,'r+')
  s=conf.read()
  conf.close()
  s= ''.join(s.split(' '))
  s=s.split('\n')
  s=[s[i] for i in range(len(s)) if len(s[i]) > 0 and s[i][0] != '#']
  ss=[]
  for i in range(len(s)):
   y=s[i].split('=')
   ss.append(y[0])
   ss.append(y[1])
  arg[ai:ai] = ss
 return arg

arg=setconfig(arg)
setglobals(arg)
#print outname
#ff = open('systemfilec6'+'-'+str(k_mer)+'-'+str(p_mer)+'-'+str(splitparameter)+'-'+str(idealerrorfraction)+'-'+fixstring(infile1)+'-'+fixstring(infile2)+'-'+str(i_number)+'-'+str(t_number),'w+')

def fixstring(a):
 if isinstance(a,list):
  b=[]
  for i in range(len(a)):
   b.append(a[i].split('/').pop())
  return str('+'.join(b))
 b=a.split('/')
 b=b.pop()
 return str(b)

outdir=''
if isinstance(infile1,list):
 outdir=infile1[0][:]
else:
 outdir=infile1[:]
outdir=outdir.split('/')
outdir.pop()
if len(outdir):
 outdir='/'.join(outdir)
 outdir=outdir+'/'
else:
 outdir='/'.join(outdir)
ffname=outdir+'systemfilec6'+'-'+str(k_mer)+'-'+str(p_mer)+'-'+str(time.time())+str(random.random())
ff = open(ffname,'w+')
print 'start:'
ss=sys.stdout
sys.stdout=ff

print 'k_mer :',k_mer
print 'p_mer :',p_mer
print 'i_mer :',i_mer
print 'm_mer :',m_mer
print 'i_number :',i_number
print 't_number :',t_number
print 'p_number :',p_number
print 'splitparameter :',splitparameter
dlist={'A':[0]*k_mer, 'C':[0]*k_mer, 'G':[0]*k_mer, 'T':[0]*k_mer}

def readfawfile( infile,i_n=i_number):
 file=open(infile, 'r' )
 readlist=[]
 i=0
 r=file.readline()
 back=1
 if r[len(r)-2]== '\r':
  back=2
#bc= back constrain
 bc=back+2
 while len(r) >=bc and i <i_n:
  weight=int(r[:-back].split(' ')[1])
  s=file.readline()
  s=s[:-back]
  if 'N' not in s:
   readlist.append((s,weight))
   i+=1
  r=file.readline()
 if i < i_n:
  print str(infile)+' exhausted! only '+str(i)+' reads are read.'
 file.close()
 return readlist

def readfafile( infile,i_n=i_number,t_n=t_number):
 file=open(infile, 'r' )
 readlist=[]
 i=0
 r=file.readline()
 back=1
 if r[len(r)-2]== '\r':
  back=2
#bc= back constrain
 bc=back+2
 back=back+t_n
 while len(r) >=bc and i <i_n:
  s=file.readline()
  s=s[:-back]
  if 'N' not in s and len(s) > k_mer:
   readlist.append(s)
   i+=1
  r=file.readline()
 if i < i_n:
  print str(infile)+' exhausted! only '+str(i)+' reads are read.'
 file.close()
 return readlist

def readfapair( infile1,infile2,i_n=i_number,t_1=t_number,t_2=None):
 file1=open(infile1, 'r' )
 file2=open(infile2, 'r' )
 readlist1=[]
# readlist2=[]
 i=0
 r1=file1.readline()
 r2=file2.readline()
 if t_2 is None:
  t_2=t_1
#  if len(r1)<=len(r2):
#   t_2 =t_1+ len(r2) -len(r1)
#  else:
#   t_2=t_1
#   t_1 =t_2+ len(r1) -len(r2)
 back1=1
 if r1[len(r1)-2]== '\r':
  back1=2
#bc= back constrain
 bc1=back1+2
 back1=back1+t_1
 back2=1
 if r2[len(r2)-2]== '\r':
  back2=2
#bc= back constrain
 bc2=back2+2
 back2=back2+t_2
 while len(r1) >=bc1 and len(r2) >=bc2 and i <i_n:
  s1=file1.readline()
  s1=s1[:-back1]
  s2=file2.readline()
  s2=s2[:-back2]
  if 'N' not in s1 and 'N' not in s2 and len(s1) > k_mer and len(s2) > k_mer:
   readlist1.append(s1)
#   readlist2.append(s2)
   readlist1.append(s2)
   i+=1
  r1=file1.readline()
  r2=file2.readline()
 if i < i_n:
  print str(infile1)+' and '+str(infile2)+' exhausted! only '+str(i)+' pair reads are read.'
 file1.close()
 file2.close()
# return [readlist1,readlist2]
 return readlist1

def readfqfile( infile,i_n=i_number,t_n=t_number):
 file=open(infile, 'r' )
 readlist=[]
 i=0
 r=file.readline()
 back=1
 if r[len(r)-2]== '\r':
  back=2
#bc= back constrain
 bc=back+2
 back=back+t_n
 while len(r) >=bc and i <i_n:
  s=file.readline()
  s=s[:-back]
  if 'N' not in s and len(s) > k_mer:
   readlist.append(s)
   i+=1
  r=file.readline()
  r=file.readline()
  r=file.readline()
 if i < i_n:
  print str(infile)+' exhausted! only '+str(i)+' reads are read.'
 file.close()
 return readlist

def readfqpair( infile1,infile2,i_n=i_number,t_1=t_number,t_2=None):
 file1=open(infile1, 'r' )
 file2=open(infile2, 'r' )
 readlist1=[]
# readlist2=[]
 i=0
 r1=file1.readline()
 r2=file2.readline()
 if t_2 is None:
  t_2=t_1
#  if  len(r1)<=len(r2):
#   t_2 =t_1+ len(r2) -len(r1)
#  else:
#   t_2=t_1
#   t_1 =t_2+ len(r1) -len(r2)
 back1=1
 if r1[len(r1)-2]== '\r':
  back1=2
#bc= back constrain
 bc1=back1+2
 back1=back1+t_1
 back2=1
 if r2[len(r2)-2]== '\r':
  back2=2
#bc= back constrain
 bc2=back2+2
 back2=back2+t_2
 while len(r1) >=bc1 and len(r2) >=bc2 and i <i_n:
  s1=file1.readline()
  s1=s1[:-back1]
  s2=file2.readline()
  s2=s2[:-back2]
  if 'N' not in s1 and 'N' not in s2 and len(s1) > k_mer and len(s2) > k_mer:
   readlist1.append(s1)
#   readlist2.append(s2)
   readlist1.append(s2)
   i+=1
  r1=file1.readline()
  r1=file1.readline()
  r1=file1.readline()
  r2=file2.readline()
  r2=file2.readline()
  r2=file2.readline()
 if i < i_n:
  print str(infile1)+' and '+str(infile2)+' exhausted! only '+str(i)+' pair reads are read.'
 file1.close()
 file2.close()
# return [readlist1,readlist2]
 return readlist1

def readcsfafile( infile,i_n=i_number,t_n=t_number):
 readlist= readfullcsfafile( infile,i_n,t_n)
 for i in range(i_n):
  readlist[i]=''.join([cstd[readlist[i][j]] for j in range(2,len(readlist[i]))])
 return readlist

def readfullcsfafile( infile,i_n=i_number,t_n=t_number):
 file=open(infile, 'r' )
 readlist=[]
 i=0
 r=file.readline()
 r=file.readline()
 r=file.readline()
 r=file.readline()
 back=1
 if r[len(r)-2]== '\r':
  back=2
#bc= back constrain
 bc=back+2
 back=back+t_n
 while len(r) >=bc and i <i_n:
  s=file.readline()
  s=s[:-back]
  if '.' not in s[1:]:
   readlist.append(s)
   i+=1
  r=file.readline()
 if i < i_n:
  print str(infile)+' exhausted! only '+str(i)+' reads are read.'
 file.close()
 return readlist

def readcsfapair( infile1, infile2,i_n=i_number,t_1=t_number,t_2 =None):
# [readlist1,readlist2]= readfullcsfapair( infile1,infile2,i_n,t_n)
 readlist1= readfullcsfapair( infile1,infile2,i_n,t_1,t_2)
 for i in range(2*i_n):
  readlist1[i]=''.join([cstd[readlist1[i][j]] for j in range(2,len(readlist1[i]))])
#  readlist2[i]=readlist2[i][2:]
# return [readlist1,readlist2]
 return readlist1

def readfullcsfapair( infile1,infile2,i_n=i_number,t_1=t_number,t_2 =None):
 file1=open(infile1, 'r' )
 file2=open(infile2, 'r' )
 readlist1=[]
# readlist2=[]
 i=0
 r1=file1.readline()
 r1=file1.readline()
 r1=file1.readline()
 r1=file1.readline()
 r2=file2.readline()
 r2=file2.readline()
 r2=file2.readline()
 r2=file2.readline()
 if t_2 is None:
  if  len(r1)<=len(r2):
   t_2 =t_1+ len(r2) -len(r1)
  else:
   t_2=t_1
   t_1 =t_2+ len(r1) -len(r2)
 back1=1
 if r1[len(r1)-2]== '\r':
  back1=2
#bc= back constrain
 bc1=back1+2
 back1=back1+t_1
 back2=1
 if r2[len(r2)-2]== '\r':
  back2=2
#bc= back constrain
 bc2=back2+2
 back2=back2+t_2
 r1n,r2n=True,True
 while len(r1) >=bc1 and len(r2) >=bc2 and i <i_n:
  if r1n:
   r1n=False
   r1=r1[1:]
   r1=r1.split('_')
   r1=(int(r1[0]),int(r1[1]),int(r1[2]))
  if r2n:
   r2n=False
   r2=r2[1:]
   r2=r2.split('_')
   r2=(int(r2[0]),int(r2[1]),int(r2[2]))
  if r1<r2:
   r1=file1.readline()
   r1=file1.readline()
   r1n=True
  elif r1>r2:
   r2=file2.readline()
   r2=file2.readline()
   r2n=True
  else:
   s1=file1.readline()
   s1=s1[:-back1]
   s2=file2.readline()
   s2=s2[:-back2]
   if '.' not in s1[1:] and '.' not in s2[1:]:
    readlist1.append(s1)
#    readlist2.append(s2)
    readlist1.append(s2)
    i+=1
   r1=file1.readline()
   r1n=True
   r2=file2.readline()
   r2n=True
 if i < i_n:
  print str(infile1)+' and '+str(infile2)+' exhausted! only '+str(i)+' pair reads are read.'
 file1.close()
 file2.close()
# return [readlist1,readlist2]
 return readlist1

def readfastafile( infile):
 file=open(infile, 'r' )
 readlist=[]
 r=file.readline()
 back=1
 if r[len(r)-2]== '\r':
  back=2
#bc= back constrain
# bc=back+2
 while len(r) >=back:
  if r[0] != '>':
   r=file.readline()
   continue
  ss=[]
  r=file.readline()
  while len(r) >back and r[0] != '>':
   r=r[:-back]
   ss.append(r)
   r=file.readline()
  readlist.append(''.join(ss))
 file.close()
 return readlist

def readfastaweight( infile):
 file=open(infile, 'r' )
 readlist=[]
 r=file.readline()
 back=1
 if r[len(r)-2]== '\r':
  back=2
#bc= back constrain
 bc=back+2
 while len(r) >=bc:
  weight=int(r[1:-back])
  ss=[]
  r=file.readline()
  while len(r) >back and r[0] != '>':
   r=r[:-back]
   ss.append(r)
   r=file.readline()
  readlist.append((''.join(ss),weight))
 file.close()
 return readlist

def readfaweight( infile):
 file=open(infile, 'r' )
 readlist=[]
 r=file.readline()
 back=1
 if r[len(r)-2]== '\r':
  back=2
#bc= back constrain
 bc=back+2
 while len(r) >=bc:
  weight=int(r[1:-back])
  r=file.readline()
  readlist.append((r,weight))
 file.close()
 return readlist

def writefaweight( infile, nodelist):
 file=open(infile, 'w+' )
 file.write('\n'.join(['>'+str(nodelist[i].weight)+'\n'+str(nodelist[i].sequence) for i in range(len(nodelist))])+'\n')
 file.close()

def writelist( infile, l):
 file=open(infile, 'w+' )
 file.write('\n'.join([str(ll) for ll in l])+'\n')
 file.close()

def readint( infile):
 file=open(infile, 'r' )
 readlist=[]
 r=file.readline()
 back=1
 if r[len(r)-2]== '\r':
  back=2
#bc= back constrain
 bc=back+1
 while len(r) >=bc:
  weight=int(r[:-back])
  r=file.readline()
  readlist.append(weight)
 file.close()
 return readlist

def writeint( infile, intlist):
 file=open(infile, 'w+' )
 file.write('\n'.join([str(intlist[i]) for i in range(len(intlist))])+'\n')
 file.close()

#sequence and reverse is unconditional, getsequence and getreverse is conditional
#class kmerseed:
# def __init__(self, kmer, i):
#  self.kmer= kmer
#  if i <= p_mer:
#   self.direction =True
#   self.index=i
#  else:
#   self.direction =False
#   self.index=i- (p_mer+1)
# def getsequence(self):
#  if self.direction:
#   s=self.kmer.sequence()
#  else:
#   s=self.kmer.reverse()
#  b=self.index*i_mer
#  return s[b: (b+i_mer)]

class kmer:
 def __init__(self, read, i):
  self.read= read
  self.index=i
  self.fresh=True
  self.direction=True
  self.weight=read.weight
  self.node=None
#  self.kmerseedlist =[kmerseed(self,i) for i in range(2*(p_mer+1))]
 def getseedsequencelist(self,direction=True):
  if self.direction:
   s=self.read.sequence[self.index:(self.index+k_mer)]
  else:
   s=self.read.reverse
   d=len(s) -self.index
   s= s[d-k_mer:d]
  return [s[i*i_mer:(i*i_mer) +i_mer]+str(i) for i in range(p_mer+1)]
 def sequence(self):
  return self.read.sequence[self.index:(self.index+k_mer)]
 def getsequence(self):
  if self.direction:
   return self.read.sequence[self.index:(self.index+k_mer)]
  else:
   s=self.read.reverse
   d=len(s) -self.index
   return s[d-k_mer:d]
 def reverse(self):
  s=self.read.reverse
  d=len(s) -self.index
  return s[d-k_mer:d]
 def getreverse(self):
  if self.direction:
   s=self.read.reverse
   d=len(s) -self.index
   return s[d-k_mer:d]
  else:
   return self.read.sequence[self.index:(self.index+k_mer)]
# def getkmerseedsequencelist(self):
#  return [ self.kmerseedlist[i].getsequence() for i in range(len(self.kmerseedlist))]
# def killkmerseed(self):
#  l=len(self.kmerseedlist)
#  for i in range(l):
#   k=self.kmerseedlist.pop()
#   del k
 def getweight(self):
  return kmerweightlist[self.index: (self.index+k_mer)]  

class sread:
 def __init__(self,s):
  self.sequence=s
  self.kl=len(self.sequence) +1 -k_mer

class read:
 def __init__(self,s,weight=1):
  self.sequence=s
  self.reverse=self.getreverse()
  self.kl=len(self.sequence) +1 -k_mer
  self.weight=weight
  self.kmerlist =[0]*self.kl
 def kmersequence( self,i):
  return self.sequence[i:i+k_mer]
 def kmerreverse( self,i):
  base=self.kl -1 -i
  return self.reverse[base:base+k_mer]
 def getreverse( self):
  return ''.join([ rdna[d] for d in self.sequence[::-1] ])

class snode:
 def __init__(self, s=None):
  self.sequence=s
  self.weight=0
  self.contiginfo=(None,None)

class node:
 def __init__(self, kmerlist = None):
  if kmerlist:
   self.kmerlist=kmerlist
  else:
   self.kmerlist=[]
  self.weight=0
  self.sequence=''
#  self.list=None
#  self.direction=True
  self.fresh=1
  self.contiginfo=(None,None)
#  self.contigid=-1
#  self.contigindex =0
#  self.mergednode=None
 def getreverse( self):
  return ''.join([ rdna[d] for d in self.sequence[::-1] ])
# def reverselist(self):
#  l=self.list
#  ll=len(l)
#  l=[l[i] for i in range(ll-1,-1,-1)]
#  for i in range(ll):
#   tl=l[i]
#   temp=tl[0]
#   tl[0]=tl[3]
#   tl[3]=temp
#   temp=tl[1]
#   tl[1]=tl[2]
#   tl[2]=temp
#  self.list=l
# def mergenode(self,n):
#  if self.direction != n.direction:
#   n.reverselist()
#  sl=self.list
#  nl=n.list
#  for i in range(len(sl)):
#   tsl=sl[i]
#   tnl=nl[i]
#   tsl[0]+=tnl[0]
#   tsl[1]+=tnl[1]
#   tsl[2]+=tnl[2]
#   tsl[3]+=tnl[3]
#  n.mergednode=self
# def smergenode(self,n):
#  n.mergednode=self
 def reducekmer(self):
  skl=self.kmerlist
  lkl= len(skl)
  if lkl==1:
   skl[0].node=self
   return
  for i in range(lkl):
   skl[i].node=self
  return
 def addkmer(self, kmer):
  self.kmerlist.append(kmer)
 def popkmer(self, i):
  return self.kmerlist.pop(i)
 def findsequence1(self):
  lkl=len(self.kmerlist)
  if lkl==0:
   return False
  self.weight=sum([self.kmerlist[i].weight for i in range(lkl)])
  l=k_mer
  self.list=[{'A':0,'C':0,'G':0,'T':0} for i in range(l) ]
  for i in range(lkl):
   s=self.kmerlist[i].getsequence()
   kmerweight=self.kmerlist[i].weight
   for j in range(l):
    self.list[j][s[j]] +=kmerweight
  s=['A' for i in range(l)]
  for i in range(l):
   for d in ['C', 'G', 'T']:
    if self.list[i][d] > self.list[i][s[i]]:
     s[i]=d
  self.sequence= ''.join(s)
#  print(self.sequence)
#  print(self.list)
  return True
 def finddifference1( self):
  if (idealerrorfraction>0)and(self.weight< idealnumber):
   return (-1,'N')
  for i in range(len(self.sequence)):
   for d in ['A','C','G','T']:
    if d != self.sequence[i]:
     if ((idealerrorfraction==0 or self.list[i][d]>= idealnumber) and float(self.list[i][d])/float(self.weight) >= idealerrorfraction ) or self.list[i][d] >= (float(self.list[i][self.sequence[i]])*0.6):
      return (i,d)
  return (-1,'N')
 def findsplit(self):
  skl=self.kmerlist
  lkl=len(skl)
  if lkl==1:
   self.weight=skl[0].weight
   self.sequence=skl[0].getsequence()
   return (-1,'N')
  l=k_mer
  if lkl==2:
   s0w=skl[0].weight
   s1w=skl[1].weight
   self.weight=s0w+s1w
   selfweight= self.weight
   if s0w> s1w:
    s0s=skl[0].getsequence()
    if (idealerrorfraction==0 or selfweight >= idealnumber) and (((idealerrorfraction==0 or s1w>= idealnumber) and s1w/float(selfweight) >= idealerrorfraction ) or s1w >= int(s0w*splitparameter)):
     s1s=skl[1].getsequence()
     for i in range(l):
      if s1s[i] != s0s[i]:
       return (i,s1s[i])
    self.sequence=s0s
    return (-1,'N')
   elif s0w< s1w:
    s1s=skl[1].getsequence()
    if (idealerrorfraction==0 or selfweight >= idealnumber) and (((idealerrorfraction==0 or s0w>= idealnumber) and s0w/float(selfweight) >= idealerrorfraction ) or s0w >= int(s1w*splitparameter)):
     s0s=skl[0].getsequence()
     for i in range(l):
      if s0s[i] != s1s[i]:
       return (i,s0s[i])
    self.sequence=s1s
    return (-1,'N')
   else:
    s0s=skl[0].getsequence()
    s1s=skl[1].getsequence()
    if (idealerrorfraction==0 or selfweight >= idealnumber) and (((idealerrorfraction==0 or s0w>= idealnumber) and s0w/float(selfweight) >= idealerrorfraction ) or s0w >= int(s1w*splitparameter)):
     for i in range(l):
      if s0s[i]!=s1s[i]:
       return (i,max([s0s[i],s1s[i]]))
     self.sequence=s0s
     return (-1,'N')
    s=[0]*l
    for i in range(l):
     if s0s[i]==s1s[i]:
      s[i]=s0s[i]
     else:
      s[i]=min([s0s[i],s1s[i]])
    self.sequence=''.join(s)
    return (-1,'N')
  rd=('C','G','T')
  self.weight=sum([skl[i].weight for i in range(lkl)])
  dl=dlist
  for d in dl.values():
   for i in range(l):
    d[i]=0
#  dl={'A':[0]*l,'C':[0]*l,'G':[0]*l,'T':[0]*l}
  for k in skl:
   s=k.getsequence()
   kmerweight=k.weight
   for j in range(l):
    dl[s[j]][j] +=kmerweight
  s=['A']*l
  for j in range(l):
   for d in rd:
    if dl[d][j] > dl[s[j]][j]:
     s[j]=d
  selfsequence= ''.join(s)
  self.sequence=selfsequence
  selfweight=self.weight
  if idealerrorfraction>0 and selfweight< idealnumber:
   return (-1,'N')
  for i in range(l):
   ssi= selfsequence[i]
   ds=int(dl[ssi][i]*splitparameter)
   for d in itd:
    ld=dl[d][i]
    if d != ssi and ld:
     if ((idealerrorfraction==0 or ld>= idealnumber) and ld/float(selfweight) >= idealerrorfraction ) or ld >= ds:
      return (i,d)
  return (-1,'N')
 def splitnode(self):
#  if not self.findsequence():
#   print('error at findsequence()')
#   sys.exit(1)
#   return []
  (p,d) = self.findsplit()
#notice, be care
#  print((p,d))
#  print(self.sequence)
#  print(self.list)
#  del self.list
  if p<0:
   self.reducekmer()
   return [self]
  a=node()
  skl=self.kmerlist
#  if not isinstance(skl ,list):
#   self.kmerlist=list(skl)
  for i in range(len(skl)-1,-1,-1):
   if skl[i].getsequence()[p] == d:
    a.addkmer( self.popkmer(i)) 
  s=self.splitnode()
  s.extend(a.splitnode())
  return s

#class contigseed:
# def __init__(self, contig, i):
#  self.contig= contig
#  if i==0:
#   self.direction =True
#  else:
#   self.direction =False
# def getsequence(self):
#  if self.direction:
#   s=self.contig.sequence
#  else:
#   s=self.contig.reverse
#  return s[0: i_mer]

class scontig:
 def __init__(self,nodelist, sequence):
  self.sequence=sequence
  self.sum=sum([nodetemp[0] for nodetemp in nodelist ])
  self.weight=self.sum/ float(len(nodelist))
  self.nodelist =nodelist
  self.reverse= self.getreverse()
  self.fresh = True
  self.id=-1
#  self.list=None
  self.pre = []
  self.predirection = []
  self.next = []
  self.nextdirection = []
 def getreverse( self):
  return ''.join([ rdna[d] for d in self.sequence[::-1] ])
# def getcontigseedsequencelist( self):
#  return [ self.contigseedlist[i].getsequence() for i in range(len(contigseedlist)) ]
 def flick(self):
  temp=self.sequence
  self.sequence= self.reverse
  self.reverse =temp
  self.nodelist.reverse()
#  for i in range(len(self.nodelist)):
#   self.nodelist[i].direction = not self.nodelist[i].direction
 def extendnext(self, current, samedirection = True,distance =1):
  if len(self.next) == 1:
   next = self.next[0]
   nextdirection= self.nextdirection[0]
   if nextdirection:
    pre = next.pre
   else:
    pre = next.next
   if len(pre)==1:
    if next is not self:
     if nextdirection:
      self.next=next.next
      self.nextdirection=next.nextdirection
     else:
      next.flick()
      self.next=next.pre
      self.nextdirection= [ not direction for direction in next.predirection ]
     if distance >1:
      kmd= k_mer -distance
      if kmd:
       self.sequence= self.sequence + next.sequence[kmd:]
       self.reverse= next.reverse[:-kmd] + self.reverse
      else:
       self.sequence= self.sequence + next.sequence[:]
       self.reverse= next.reverse[:] + self.reverse
      for i in range(distance -1):
       self.nodelist.append( [0,None,None])
     else:
      kmm= k_mer -1
      self.sequence= self.sequence + next.sequence[kmm:]
      self.reverse= next.reverse[:-kmm] + self.reverse
     lnextnode=len(next.nodelist)
     if lnextnode ==1:
      self.nodelist.append(next.nodelist[0])
     else:
      self.nodelist.extend(next.nodelist)
     self.sum +=next.sum
     return (next,nextdirection)
  next=self.next
  nextdirection=self.nextdirection
  if samedirection:
   for i in range(len(next)):
    ni=next[i]
    if nextdirection[i]:
     ni.pre[ findpair(ni.pre,ni.predirection,current,True)] = self
    else:
     ni.next[ findpair(ni.next,ni.nextdirection,current,False)] = self
  else:
   for i in range(len(next)):
    ni=next[i]
    if nextdirection[i]:
     d= findpair(ni.pre,ni.predirection,current,False)
     ni.pre[d] = self
#     ni.predirection[d] = not ni.predirection[d]
     ni.predirection[d] =True
    else:
     d= findpair(ni.next,ni.nextdirection,current,True)
     ni.next[d] = self
#     ni.nextdirection[d] = not ni.nextdirection[d]
     ni.nextdirection[d] = False
  return None
 def extendpre(self, current, samedirection = True,distance =1):
  if len(self.pre) == 1:
   pre = self.pre[0]
   predirection= self.predirection[0]
   if predirection:
    next = pre.next
   else:
    next = pre.pre
   if len(next)==1:
    if pre is not self:
     if predirection:
      self.pre=pre.pre
      self.predirection=pre.predirection
     else:
      pre.flick()
      self.pre=pre.next
      self.predirection= [ not direction for direction in pre.nextdirection ]
     if distance > 1:
      kmd= k_mer -distance
      if kmd:
       self.sequence= pre.sequence[:-kmd]+self.sequence
       self.reverse= self.reverse + pre.reverse[kmd:]
      else:
       self.sequence= pre.sequence[:]+self.sequence
       self.reverse= self.reverse + pre.reverse[:]
      for i in range(distance -1):
       self.nodelist.insert( 0,[0,None,None])
     else:
      kmm= k_mer -1
      self.sequence= pre.sequence[:-kmm]+self.sequence
      self.reverse= self.reverse + pre.reverse[kmm:]
     lprenode=len(pre.nodelist)
     if lprenode==1:
      self.nodelist.insert(0,pre.nodelist[0])
     else:
#      pre.nodelist.reverse()
      self.nodelist[:0]=pre.nodelist
     self.sum +=pre.sum
     return (pre,predirection)
  pre=self.pre
  predirection=self.predirection
  if samedirection:
   for i in range(len(pre)):
    pi=pre[i]
    if predirection[i]:
     pi.next[ findpair(pi.next,pi.nextdirection,current,True)] = self
    else:
     pi.pre[ findpair(pi.pre,pi.predirection,current,False)] = self
  else:
   for i in range(len(pre)):
    pi=pre[i]
    if predirection[i]:
     d= findpair(pi.next,pi.nextdirection,current,False)
     pi.next[d] = self
#     pi.nextdirection[d] = not pi.nextdirection[d]
     pi.nextdirection[d] =True 
    else:
     d= findpair(pi.pre,pi.predirection,current,True)
     pi.pre[d] = self
#     pi.predirection[d] = not pi.predirection[d]
     pi.predirection[d] = False
  return None
# def findlist(self):
#  self.list=[{'A':0.0,'C':0.0,'G':0.0,'T':0.0} for i in range(len(self.sequence)) ]
#  for i in range(len(self.nodelist)):
#   node= self.nodelist[i]
#   if node is not None:
#    for j in range(len(node.kmerlist)):
#     kmer=node.kmerlist[j]
#     weight= kmer.getweight()
#     if node.direction:
#      s=kmer.getsequence()
#     else:
#      s=kmer.getreverse()
#     for k in range(k_mer):
#      self.list[i+k][s[k]] = self.list[i+k][s[k]] +weight[k]
#  for i in range(len(self.list)):
#   self.list[i]['N'] = self.list[i]['A']+ self.list[i]['C']+ self.list[i]['G']+ self.list[i]['T']

class contig:
 def __init__(self,node, multisequence =False):
  if multisequence:
   self.sequence=multisequence
   nl=len(node)
   self.sum=sum([node[i].weight for i in range(nl)])
   self.weight=self.sum/ float(nl)
   self.nodelist =node
#   for i in range(nl):
#    ni=node[i]
#    del ni.sequence
#    del ni.weight
#    del ni.direction
#    del ni.fresh
#    del ni.kmerlist
  else:
   self.sequence=node.sequence[:]
   self.sum=node.weight*(len(self.sequence)+1-k_mer)
   self.weight=node.weight
   self.nodelist =[node]
#   del node.sequence
#   del node.weight
#   del node.direction
#   del node.fresh
#   del node.kmerlist
  self.reverse= self.getreverse()
  self.fresh = True
  self.id=-1
#  self.list=None
  self.pre = []
  self.predirection = []
  self.next = []
  self.nextdirection = []
 def getreverse( self):
  return ''.join([ rdna[d] for d in self.sequence[::-1] ])
# def getcontigseedsequencelist( self):
#  return [ self.contigseedlist[i].getsequence() for i in range(len(contigseedlist)) ]
 def flick(self):
  temp=self.sequence
  self.sequence= self.reverse
  self.reverse =temp
  self.nodelist.reverse()
#  for i in range(len(self.nodelist)):
#   self.nodelist[i].direction = not self.nodelist[i].direction
 def extendnext(self, current, samedirection = True,distance =1):
  if len(self.next) == 1:
   next = self.next[0]
   nextdirection= self.nextdirection[0]
   if nextdirection:
    pre = next.pre
   else:
    pre = next.next
   if len(pre)==1:
    if next is not self:
     if nextdirection:
      self.next=next.next
      self.nextdirection=next.nextdirection
     else:
      next.flick()
      self.next=next.pre
      self.nextdirection= [ not direction for direction in next.predirection ]
     if distance >1:
      kmd= k_mer -distance
      if kmd:
       self.sequence= self.sequence + next.sequence[kmd:]
       self.reverse= next.reverse[:-kmd] + self.reverse
      else:
       self.sequence= self.sequence + next.sequence[:]
       self.reverse= next.reverse[:] + self.reverse
      for i in range(distance -1):
       self.nodelist.append( node())
     else:
      kmm= k_mer -1
      self.sequence= self.sequence + next.sequence[kmm:]
      self.reverse= next.reverse[:-kmm] + self.reverse
     lnextnode=len(next.nodelist)
     if lnextnode ==1:
      self.nodelist.append(next.nodelist[0])
     else:
      self.nodelist.extend(next.nodelist)
     self.sum +=next.sum
     return (next,nextdirection)
  next=self.next
  nextdirection=self.nextdirection
  if samedirection:
   for i in range(len(next)):
    ni=next[i]
    if nextdirection[i]:
     ni.pre[ findpair(ni.pre,ni.predirection,current,True)] = self
    else:
     ni.next[ findpair(ni.next,ni.nextdirection,current,False)] = self
  else:
   for i in range(len(next)):
    ni=next[i]
    if nextdirection[i]:
     d= findpair(ni.pre,ni.predirection,current,False)
     ni.pre[d] = self
#     ni.predirection[d] = not ni.predirection[d]
     ni.predirection[d] =True
    else:
     d= findpair(ni.next,ni.nextdirection,current,True)
     ni.next[d] = self
#     ni.nextdirection[d] = not ni.nextdirection[d]
     ni.nextdirection[d] = False
  return None
 def extendpre(self, current, samedirection = True,distance =1):
  if len(self.pre) == 1:
   pre = self.pre[0]
   predirection= self.predirection[0]
   if predirection:
    next = pre.next
   else:
    next = pre.pre
   if len(next)==1:
    if pre is not self:
     if predirection:
      self.pre=pre.pre
      self.predirection=pre.predirection
     else:
      pre.flick()
      self.pre=pre.next
      self.predirection= [ not direction for direction in pre.nextdirection ]
     if distance > 1:
      kmd= k_mer -distance
      if kmd:
       self.sequence= pre.sequence[:-kmd]+self.sequence
       self.reverse= self.reverse + pre.reverse[kmd:]
      else:
       self.sequence= pre.sequence[:]+self.sequence
       self.reverse= self.reverse + pre.reverse[:]
      for i in range(distance -1):
       self.nodelist.insert( 0,node())
     else:
      kmm= k_mer -1
      self.sequence= pre.sequence[:-kmm]+self.sequence
      self.reverse= self.reverse + pre.reverse[kmm:]
     lprenode=len(pre.nodelist)
     if lprenode==1:
      self.nodelist.insert(0,pre.nodelist[0])
     else:
#      pre.nodelist.reverse()
      self.nodelist[:0]=pre.nodelist
     self.sum +=pre.sum
     return (pre,predirection)
  pre=self.pre
  predirection=self.predirection
  if samedirection:
   for i in range(len(pre)):
    pi=pre[i]
    if predirection[i]:
     pi.next[ findpair(pi.next,pi.nextdirection,current,True)] = self
    else:
     pi.pre[ findpair(pi.pre,pi.predirection,current,False)] = self
  else:
   for i in range(len(pre)):
    pi=pre[i]
    if predirection[i]:
     d= findpair(pi.next,pi.nextdirection,current,False)
     pi.next[d] = self
#     pi.nextdirection[d] = not pi.nextdirection[d]
     pi.nextdirection[d] =True 
    else:
     d= findpair(pi.pre,pi.predirection,current,True)
     pi.pre[d] = self
#     pi.predirection[d] = not pi.predirection[d]
     pi.predirection[d] = False
  return None
# def findlist(self):
#  self.list=[{'A':0.0,'C':0.0,'G':0.0,'T':0.0} for i in range(len(self.sequence)) ]
#  for i in range(len(self.nodelist)):
#   node= self.nodelist[i]
#   if node is not None:
#    for j in range(len(node.kmerlist)):
#     kmer=node.kmerlist[j]
#     weight= kmer.getweight()
#     if node.direction:
#      s=kmer.getsequence()
#     else:
#      s=kmer.getreverse()
#     for k in range(k_mer):
#      self.list[i+k][s[k]] = self.list[i+k][s[k]] +weight[k]
#  for i in range(len(self.list)):
#   self.list[i]['N'] = self.list[i]['A']+ self.list[i]['C']+ self.list[i]['G']+ self.list[i]['T']

def getkmerweight(l):
 w=[0 for i in range(l)]
 for i in range(1+ len(w) - k_mer):
  for j in range(k_mer):
   w[i+j] = w[i+j] +1
 return [ 1.0 / float(w[i]) for i in range(len(w))]

def distance( s1, s2):
 d=0
 for i in range(len(s1)):
  if s1[i] != s2[i]:
   d=d+1
 return d

def withindistance( s1, s2, distance):
 if distance==0:
  return(s1==s2)
 d=0
 for i in range(len(s1)):
  if s1[i] != s2[i]:
   d+=1
   if d>distance:
    return False
 return True

def readsetkmer2(readpool):
 pool=[0]*(len(readpool)*2)
 i=0
 for temp in readpool:
  pool[i]=temp.kmerlist[0].node
  i+=1
  pool[i]=temp.kmerlist[-1].node
  i+=1
# del readpool
 return pool

def readsetkmer(readpool):
 return [readpool[i/2].kmerlist[-(i%2)].node for i in range(2*len(readpool)) ]

def sbuildkpgraph2( readpool ):
# print('k_mer :',k_mer)
 dg={}
 kmerpool=[]
 for temp in readpool:
  k=temp.kl-1
  for j in range(temp.kl):
   seedsequence=temp.reverse[k-j:(k-j)+k_mer]
   if seedsequence in dg:
    kmertemp=dg[seedsequence]
    temp.kmerlist[j]=kmertemp
    kmertemp.weight += temp.weight
   else:
    seedsequence=temp.sequence[j:j+k_mer]
    if seedsequence in dg:
     kmertemp=dg[seedsequence]
     temp.kmerlist[j]=kmertemp
     kmertemp.weight += temp.weight
    else:
     kmertemp=kmer(temp,j)
     dg[seedsequence]=kmertemp
     temp.kmerlist[j]=kmertemp
     kmerpool.append(kmertemp)
 del dg
 return kmerpool

def sbuildkpgraph( readpool ):
 def newdg(dg, temp, j):
  seedsequence=temp.reverse[(temp.kl-1)-j:((temp.kl-1)-j)+k_mer]
  if seedsequence in dg:
   kmertemp=dg[seedsequence]
   temp.kmerlist[j]=kmertemp
   kmertemp.weight += temp.weight
   return 0
  else:
   seedsequence=temp.sequence[j:j+k_mer]
   if seedsequence in dg:
    kmertemp=dg[seedsequence]
    temp.kmerlist[j]=kmertemp
    kmertemp.weight += temp.weight
    return 0
   else:
    kmertemp=kmer(temp,j)
    dg[seedsequence]=kmertemp
    temp.kmerlist[j]=kmertemp
    return 1

 dg={}
 kmerpool= [ temp.kmerlist[j] for temp in readpool for j in range(temp.kl) if newdg(dg,temp, j) ]
 del dg
 return kmerpool

def sbuildkpgraphtrans( readpool ):
 def newdg(dg, temp, j):
  seedsequence=temp.sequence[j:j+k_mer]
  if seedsequence in dg:
   kmertemp=dg[seedsequence]
   temp.kmerlist[j]=kmertemp
   kmertemp.weight += temp.weight
   return 0
  else:
   kmertemp=kmer(temp,j)
   dg[seedsequence]=kmertemp
   temp.kmerlist[j]=kmertemp
   return 1

 dg={}
 kmerpool= [ temp.kmerlist[j] for temp in readpool for j in range(temp.kl) if newdg(dg,temp, j) ]
 del dg
 return kmerpool

#dg[key]=(base,base)->dg[key]=(base,limit)
def sdg(dg):
 total=0
 for i in dg.keys():
  temp=dg[i]
  dg[i] =(total,total)
  total += temp
 return (dg, [0]*total)

#dg[key]= base, array[base]=base+1 ->dg[key]= base, array[base]=limit
def fdg(dg):
 dgv=dg.values()
 total = len(dgv)
 total+= sum(dgv)
 a= [0]*total
 total=0
 for i in dg.keys():
  temp=dg[i]+1
  dg[i] =total
  a[total]=total+1
  total += temp
 return (dg, a)

def obuildkpgraph( kmerpool ):
 print 'k_mer :',k_mer
 print 'p_mer :',p_mer
 print 'i_mer :',i_mer
 kpl=len(kmerpool)
#build readpool kmerpool dg
 print 'start kmer dg:',time.time()
 ksl=range(p_mer+1)
 dg={}
 for kmertemp in kmerpool:
  kmertemp.fresh =True
  for seedsequence in kmertemp.getseedsequencelist():
   if seedsequence in dg:
    dg[seedsequence]+=1
   else:
    dg[seedsequence] =1
 (dg,dga)=fdg(dg)
 for kmertemp in kmerpool:
  for seedsequence in kmertemp.getseedsequencelist():
   dgs=dg[seedsequence]
   dga[dga[dgs]]=kmertemp
   dga[dgs]+=1
# build nodepool
 print 'start link kp graph and build node:',time.time()
 nodepool=[]
 pool=[]
# p =p_mer+1
 for i in range(len(kmerpool)):
  kmertemp=kmerpool[i]
  if not kmertemp.fresh:
   continue
  kmertemp.fresh=False
  pool.append(kmertemp)
  nodetemp= node()
  nodetemp.addkmer(kmertemp)
  nodepool.append(nodetemp)
  reqpool={}
  while len(pool):
   kmertemp = pool.pop(0)
   sequence=kmertemp.sequence()
   seedlist =kmertemp.getseedsequencelist(True)
   seedrlist= kmertemp.getseedsequencelist(False)
   for r in ksl:
    seedr=seedlist[r]
    seedpool= dg.get(seedr)
    if seedpool is None:
     continue
    dgs=seedpool
    seedpool=dga[seedpool:dga[seedpool]]
    for j in range(len(seedpool)-1,0,-1):
     if not seedpool[j].fresh:
      seedpool.pop(j)
      seedpool[0] -=1
      continue
     else:
      temp=seedpool[j]
      if True:
       if kpgraph.indistance(sequence,temp.sequence(),p_mer):
        temp.direction=kmertemp.direction
        temp.fresh=False
        pool.append(temp)
        nodetemp.addkmer(temp)
        seedpool.pop(j)
        seedpool[0] -=1
    dga[dgs:seedpool[0]]=seedpool
   for r in ksl:
    seedr=seedrlist[r]
    seedpool= dg.get(seedr)
    if seedpool is None:
     continue
    dgs=seedpool
    seedpool=dga[seedpool:dga[seedpool]]
    for j in range(len(seedpool)-1,0,-1):
     if not seedpool[j].fresh:
      seedpool.pop(j)
      seedpool[0] -=1
      continue
     else:
      temp=seedpool[j]
      if True:
       if kpgraph.indistance(sequence,temp.reverse(),p_mer):
        temp.direction=not kmertemp.direction
        temp.fresh=False
        pool.append(temp)
        nodetemp.addkmer(temp)
        seedpool.pop(j)
        seedpool[0] -=1
    dga[dgs:seedpool[0]]=seedpool
 del dga
 del dg
 del kmerpool
 del pool
 return nodepool

def mapkpgraphpc( contigpool, nodepool ):
 sysfile1 = 'systemfilec1'+outname
 sysfile2 = 'systemfilec2'+outname
 sysfile3 = 'systemfilec3'+outname
 sysfile4 = 'systemfilec4'+outname
 sysfile5 = 'systemfilec5'+outname
 sysfile6 = 'systemfilec6'+outname
 sfile=open(sysfile1, 'w+' )
 if len(contigpool):
  sfile.write('\n'.join([temp.sequence for temp in contigpool])+'\n')
 sfile.close()
 sfile=open(sysfile2, 'w+' )
 if len(nodepool):
  sfile.write('\n'.join([temp.sequence for temp in nodepool])+'\n')
 sfile.close()
 print 'start kp graph',time.time()
 kpgraph.mapkpgraphc(k_mer,p_mer,i_mer, sysfile1, sysfile2, sysfile3, sysfile4, sysfile5, sysfile6 )
 print 'end kp graph',time.time()
 sysfile3=open(sysfile3, 'r' )
 tempf=sysfile3.read()
 tempf3=tempf.split(',')
 tempf3.pop()
 sysfile3.close()
 sysfile4=open(sysfile4, 'r' )
 tempf=sysfile4.read()
 tempf4=tempf.split(',')
 tempf4.pop()
 sysfile4.close()
 sysfile5=open(sysfile5, 'r' )
 tempf=sysfile5.read()
 tempf5=tempf.split(',')
 tempf5.pop()
 sysfile5.close()
 sysfile6=open(sysfile6, 'r' )
 tempf=sysfile6.read()
 tempf6=tempf.split(',')
 tempf6.pop()
 sysfile6.close()
 pool=tuple((contigpool[int(tempf3[i])],int(tempf4[i]),int(tempf5[i])) for i in range(len(tempf3)) )
 [setattr(nodepool[i],'contiginfo',pool[int(tempf6[i]):int(tempf6[i+1])]) for i in range(len(tempf6)-1) ]
 return nodepool
# def rsetattr(a,b):
#  setattr(a,'kmerlist',b)
#  return a
# pool=[rsetattr(pool[int(tempf2[i])],int(tempf3[i])) for i in range(len(kmerpool)) ]
# return [node(pool[int(tempf4[i]):int(tempf4[i+1])]) for i in range(len(tempf4)-1) ]

def mapkpgraphtranspc( contigpool, nodepool ):
 sysfile1 = 'systemfilec1'+outname
 sysfile2 = 'systemfilec2'+outname
 sysfile3 = 'systemfilec3'+outname
 sysfile4 = 'systemfilec4'+outname
 sysfile5 = 'systemfilec5'+outname
 sfile=open(sysfile1, 'w+' )
 if len(contigpool):
  sfile.write('\n'.join([temp.sequence for temp in contigpool])+'\n')
 sfile.close()
 sfile=open(sysfile2, 'w+' )
 if len(nodepool):
  sfile.write('\n'.join([temp.sequence for temp in nodepool])+'\n')
 sfile.close()
 print 'start kp graphtrans',time.time()
 kpgraph.mapkpgraphtransc(k_mer,p_mer,i_mer, sysfile1, sysfile2, sysfile3, sysfile4, sysfile5 )
 print 'end kp graphtrans',time.time()
 sysfile3=open(sysfile3, 'r' )
 tempf=sysfile3.read()
 tempf3=tempf.split(',')
 tempf3.pop()
 sysfile3.close()
 sysfile4=open(sysfile4, 'r' )
 tempf=sysfile4.read()
 tempf4=tempf.split(',')
 tempf4.pop()
 sysfile4.close()
 sysfile5=open(sysfile5, 'r' )
 tempf=sysfile5.read()
 tempf5=tempf.split(',')
 tempf5.pop()
 sysfile5.close()
 pool=tuple((contigpool[int(tempf3[i])],int(tempf4[i])) for i in range(len(tempf3)) )
 [setattr(nodepool[i],'contiginfo',pool[int(tempf5[i]):int(tempf5[i+1])]) for i in range(len(tempf5)-1) ]
 return nodepool


def alignkpgraphpc( contigpool, nodepool ):
 sysfile1 = 'systemfilec1'+outname
 sysfile2 = 'systemfilec2'+outname
 sysfile3 = 'systemfilec3'+outname
 sysfile4 = 'systemfilec4'+outname
 sysfile5 = 'systemfilec5'+outname
 sysfile6 = 'systemfilec6'+outname
 sfile=open(sysfile1, 'w+' )
 if len(contigpool):
  sfile.write('\n'.join([temp.sequence for temp in contigpool])+'\n')
 sfile.close()
 sfile=open(sysfile2, 'w+' )
 if len(nodepool):
  sfile.write('\n'.join([temp.sequence for temp in nodepool])+'\n')
 sfile.close()
 print 'start kp graph',time.time()
 kpgraph.alignkpgraphc(k_mer,p_mer,i_mer, sysfile1, sysfile2, sysfile3, sysfile4, sysfile5, sysfile6 )
 print 'end kp graph',time.time()
 sysfile3=open(sysfile3, 'r' )
 tempf=sysfile3.read()
 tempf3=tempf.split(',')
 tempf3.pop()
 sysfile3.close()
 sysfile4=open(sysfile4, 'r' )
 tempf=sysfile4.read()
 tempf4=tempf.split(',')
 tempf4.pop()
 sysfile4.close()
 sysfile5=open(sysfile5, 'r' )
 tempf=sysfile5.read()
 tempf5=tempf.split(',')
 tempf5.pop()
 sysfile5.close()
 sysfile6=open(sysfile6, 'r' )
 tempf=sysfile6.read()
 tempf6=tempf.split(',')
 tempf6.pop()
 sysfile6.close()
 pool=tuple((contigpool[int(tempf3[i])],int(tempf4[i]),int(tempf5[i])) for i in range(len(tempf3)) )
 [setattr(nodepool[i],'contiginfo',pool[int(tempf6[i]):int(tempf6[i+1])]) for i in range(len(tempf6)-1) ]
 return nodepool
# def rsetattr(a,b):
#  setattr(a,'kmerlist',b)
#  return a
# pool=[rsetattr(pool[int(tempf2[i])],int(tempf3[i])) for i in range(len(kmerpool)) ]
# return [node(pool[int(tempf4[i]):int(tempf4[i+1])]) for i in range(len(tempf4)-1) ]

def alignkpgraphtranspc( contigpool, nodepool ):
 sysfile1 = 'systemfilec1'+outname
 sysfile2 = 'systemfilec2'+outname
 sysfile3 = 'systemfilec3'+outname
 sysfile4 = 'systemfilec4'+outname
 sysfile5 = 'systemfilec5'+outname
 sfile=open(sysfile1, 'w+' )
 if len(contigpool):
  sfile.write('\n'.join([temp.sequence for temp in contigpool])+'\n')
 sfile.close()
 sfile=open(sysfile2, 'w+' )
 if len(nodepool):
  sfile.write('\n'.join([temp.sequence for temp in nodepool])+'\n')
 sfile.close()
 print 'start kp graphtrans',time.time()
 kpgraph.alignkpgraphtransc(k_mer,p_mer,i_mer, sysfile1, sysfile2, sysfile3, sysfile4, sysfile5 )
 print 'end kp graphtrans',time.time()
 sysfile3=open(sysfile3, 'r' )
 tempf=sysfile3.read()
 tempf3=tempf.split(',')
 tempf3.pop()
 sysfile3.close()
 sysfile4=open(sysfile4, 'r' )
 tempf=sysfile4.read()
 tempf4=tempf.split(',')
 tempf4.pop()
 sysfile4.close()
 sysfile5=open(sysfile5, 'r' )
 tempf=sysfile5.read()
 tempf5=tempf.split(',')
 tempf5.pop()
 sysfile5.close()
 pool=tuple((contigpool[int(tempf3[i])],int(tempf4[i])) for i in range(len(tempf3)) )
 [setattr(nodepool[i],'contiginfo',pool[int(tempf5[i]):int(tempf5[i+1])]) for i in range(len(tempf5)-1) ]
 return nodepool

def mapsmrna(contigpool):
 gv= globals()
 def cmp( a,b):
  if b.weight > a.weight:
   return -1
  else:
   return 1
 def nsetattr(a,b):
  a.weight=len(b.reverse)
  return a
 contigpool.sort(cmp)
 [setattr(contigpool[i],'id',i) for i in range(len(contigpool))]
 [setattr(contigtemp,'reverse',contigtemp.sequence) for contigtemp in contigpool]
 contigpool=refreshcontig(contigpool)
 totalcontigpool=contigpool[:]
 nodepool= [nsetattr(contigtemp.nodelist[0],contigtemp) for contigtemp in contigpool]
 totalnodepool=nodepool[:]
# contigpool=[contigtemp for contigtemp in contigpool if contigtemp.weight >= i_number ]
 print 'total contigpool, contigpool', len(totalcontigpool), len(contigpool)
# def rsetattr(a,b):
#  if len(a.sequence)==0:
#   a.sequence=b
#  a.sequence=a.getreverse()
#  return a
# nodepool= [rsetattr(contigtemp.nodelist[i],contigtemp.sequence[i:i+k_mer]) for contigtemp in contigpool for i in range(len(contigtemp.nodelist)) ]
 def rsetattr(a,b):
  b.sequence=b.reverse
  a.sequence=b.getreverse()[-k_mer:]
  b.sequence=b.sequence[:k_mer]
 for matchlength in range(19,24-1,1):
  gv['k_mer']=matchlength
  [rsetattr(contigtemp.nodelist[0],contigtemp) for contigtemp in totalcontigpool]
  lengthconstrain=matchlength+2
  nodepool=[nodetemp for nodetemp in nodepool if nodetemp.weight >= lengthconstrain ]
  contigpool=[contigtemp for contigtemp in contigpool if len(contigtemp.reverse) >= lengthconstrain ]
  for ppp in range(t_number+1):
   gv['p_mer']=ppp
   gv['i_mer']=k_mer/(ppp+1)
   print 'k, p, i mer =',k_mer,p_mer,i_mer
   nodepool= mapkpgraphtranspc(contigpool,nodepool)
   nodepool=[nodetemp for nodetemp in nodepool if not len(nodetemp.contiginfo) ]
   print 'total nodepool, nodepool', len(totalnodepool), len(nodepool)
   print 'k, p, i mer =',k_mer,p_mer,i_mer
   nodepool= alignkpgraphtranspc(contigpool,nodepool)
   nodepool=[nodetemp for nodetemp in nodepool if not len(nodetemp.contiginfo) ]
   print 'total nodepool, nodepool', len(totalnodepool), len(nodepool)
 print 'total contigpool, contigpool', len(totalcontigpool), len(contigpool)
 multipair=0
 multiweight=0
 simplepair=0
 simpleweight=0
 contigpair=0
 contigweight=0
 nodepool=totalnodepool
 contigpool=totalcontigpool
 [setattr(contigtemp,'sequence',contigtemp.reverse) for contigtemp in contigpool]
 writecfafile(outname+' contig.fa', contigpool)
 mfa=outname+' smrna result.mfa'
 mfa=open(mfa, 'w+')
# writemfafile(mfa,contigpool)
 for i in range(len(contigpool)):
  contigtemp=contigpool[i]
  tempresult=nodepool[i].contiginfo
  if len(tempresult):
   contigpair +=1
   contigweight+=contigtemp.weight
   singlepair=False
   if len(tempresult)==1:
    singlepair=True
   for contigresult in tempresult:
    contigresult=contigresult[0]
    if contigresult.id > i:
     mfa.write('>'+str(contigtemp.id)+','+str(contigresult.id)+','+str(len(contigtemp.sequence))+','+str(len(contigresult.sequence))+','+str(contigtemp.weight)+','+str(contigresult.weight)+'\n'+str(contigtemp.sequence)+'\n'+str(contigresult.sequence)+'\n')
     multipair+=1
     multiweight=multiweight+contigtemp.weight+contigresult.weight
     temp=nodepool[contigresult.id].contiginfo
     if singlepair and len(temp)==1 and temp[0][0].id == i:
      simplepair+=1
      simpleweight=simpleweight+contigtemp.weight+contigresult.weight
 mfa.close()
 print 'multipair :', multipair
 print 'multiweight :', multiweight
 print 'simplepair :', simplepair
 print 'simpleweight :', simpleweight
 print 'contigpair :', contigpair
 print 'contigweight :', contigweight
 return

def writemfafile( infile, contigpool):
 infile=open(infile,'w+')
##lc: line constrain
 lc=2000
 cl=len(contigpool)
 if cl ==0:
  infile.write('>\n')
  infile.close()
  return
 [setattr(contigpool[i],'id',i) for i in range(cl)]
 for ci in contigpool:
  if not any([len(temp.contiginfo) for temp in ci.nodelist ]):
   continue
  temp=''
  for j in range(len(ci.nodelist)):
   for result in ci.nodelist[j].contiginfo:
    temp= temp+str(j)+','+str(ci.nodelist[j].weight)+','+str(result[0].id)+','+str(result[1])+','+str(result[0].nodelist[result[1]].weight)+' '
  s=ci.sequence
  ls=len(s)
  printtemp= '>id '+str(ci.id)+' length '+str(ls)+' weight '+str(ci.weight)
  pre=ci.pre
  predirection=ci.predirection
  printtemp+=' pre'
  for j in range(len(pre)):
   printtemp=printtemp+' '+str(pre[j].id)+','+str(predirection[j])
  next=ci.next
  nextdirection=ci.nextdirection
  printtemp+=' next'
  for j in range(len(next)):
   printtemp=printtemp+' '+str(next[j].id)+','+str(nextdirection[j])
  infile.write(printtemp+'\n')
  infile.write(temp+'\n')
  if ls<=lc:
   infile.write(str(s)+'\n')
  else:
   np=int(math.ceil(ls/float(lc)))-1
   sl=[s[j*lc:(j+1)*lc] for j in range(np)]
   sl.append(s[np*lc:])
   infile.write(str('\n'.join(sl))+'\n')
 infile.close()

def buildkpgraphpc( kmerpool ):
 sysfile1 = 'systemfilec1'+outname
 sysfile2 = 'systemfilec2'+outname
 sysfile3 = 'systemfilec3'+outname
 sysfile4 = 'systemfilec4'+outname
 sfile=open(sysfile1, 'w+' )
 if len(kmerpool):
  sfile.write('\n'.join([temp.getsequence() for temp in kmerpool])+'\n')
 sfile.close()
 print 'start kp graph',time.time()
 kpgraph.buildkpgraphc(k_mer,p_mer,i_mer, sysfile1, sysfile2, sysfile3, sysfile4 )
 print 'end kp graph',time.time()
 sysfile2=open(sysfile2, 'r' )
 tempf=sysfile2.read()
 tempf2=tempf.split(',')
 tempf2.pop()
 sysfile2.close()
 sysfile3=open(sysfile3, 'r' )
 tempf=sysfile3.read()
 tempf3=tempf.split(',')
 tempf3.pop()
 sysfile3.close()
 sysfile4=open(sysfile4, 'r' )
 tempf=sysfile4.read()
 tempf4=tempf.split(',')
 tempf4.pop()
 sysfile4.close()
 def rsetattr(a,b):
  setattr(a,'direction',b)
  return a

 pool=[rsetattr(kmerpool[int(tempf2[i])],int(tempf3[i])) for i in range(len(kmerpool)) ]
 return [node(pool[int(tempf4[i]):int(tempf4[i+1])]) for i in range(len(tempf4)-1) ]
# pool=kmerpool[:]
# for i in range(len(kmerpool)):
#  pool[i]=kmerpool[int(tempf2[i])]
#  pool[i].direction=int(tempf3[i])
# j=int(tempf4[0])
# nodepool=tempf4[1:]
# for i in range(len(tempf4)-1):
#  k=int(tempf4[i+1])
#  nodepool[i]=node(pool[j:k])
#  j=k
# return nodepool

def buildkpgraphtranspc( kmerpool ):
 sysfile1 = 'systemfilec1'+outname
 sysfile2 = 'systemfilec2'+outname
 sysfile3 = 'systemfilec3'+outname
 sfile=open(sysfile1, 'w+' )
 if len(kmerpool):
  sfile.write('\n'.join([temp.sequence() for temp in kmerpool])+'\n')
 sfile.close()
 print 'start kp graph',time.time()
 kpgraph.buildkpgraphtransc(k_mer,p_mer,i_mer, sysfile1, sysfile2, sysfile3 )
 print 'end kp graph',time.time()
 sysfile2=open(sysfile2, 'r' )
 tempf=sysfile2.read()
 tempf2=tempf.split(',')
 tempf2.pop()
 sysfile2.close()
 sysfile3=open(sysfile3, 'r' )
 tempf=sysfile3.read()
 tempf3=tempf.split(',')
 tempf3.pop()
 sysfile3.close()
 pool=[kmerpool[int(temp)] for temp in tempf2 ]
 return [node(pool[int(tempf3[i]):int(tempf3[i+1])]) for i in range(len(tempf3)-1) ]

def buildkpgraph( kmerpool ):
 if len(dlist['A']) < k_mer:
  for d in itd:
   dlist[d]=[0]*k_mer
 iti=tuple([str(i) for i in range(p_mer+1)])
 basei=tuple([i for i in range(0,k_mer+1,i_mer)])
 limiti= basei[1:]
# print 'iti',iti
# print 'basei',basei
# print 'limiti', limiti
# print 'k_mer :',k_mer
# print 'p_mer :',p_mer
# print 'i_mer :',i_mer
 kpl=len(kmerpool)
#build readpool kmerpool dg
 print 'start kmer dg:',time.time()
 ksl=range(p_mer+1)
# ks2=ksl*2
 dg={}
 for kmertemp in kmerpool:
  kmertemp.fresh =True
  kmertemp.direction =True
  kmersequence=kmertemp.sequence()
  for i in ksl:
   seedsequence = kmersequence[basei[i]:limiti[i]]+iti[i]
   if seedsequence in dg:
    dg[seedsequence]+=1
   else:
    dg[seedsequence] =1
 (dg,dga)=fdg(dg)
 for kmertemp in kmerpool:
  kmersequence=kmertemp.sequence()
  for i in ksl:
   seedsequence = kmersequence[basei[i]:limiti[i]]+iti[i]
   dgs=dg[seedsequence]
   dga[dga[dgs]]=kmertemp
   dga[dgs]+=1
# build nodepool
 print 'start link kp graph and build node:',time.time()
 nodepool=[]
 pool=[]
# p =p_mer+1
 for kmertemp in kmerpool:
  if not (kmertemp.fresh):
   continue
  kmertemp.fresh =False
  pool.append(kmertemp)
  nodetemp= node()
  nodetemp.addkmer(kmertemp)
  nodepool.append(nodetemp)
  while len(pool):
   kmertemp = pool.pop(0)
   kmerdirection=kmertemp.direction
   kmernotdirection= not kmerdirection
   kmersequence=kmertemp.sequence()
   kmerreverse=kmertemp.reverse()
   for i in ksl:
    seedsequence = kmersequence[basei[i]:limiti[i]]+iti[i]
    if seedsequence not in dg:
     continue
    dgs=dg[seedsequence] 
    seedpool=dga[dgs:dga[dgs]]
    seedzero=seedpool[0]
    for j in range(len(seedpool)-1,0,-1):
     temp=seedpool[j]
     if not(temp.fresh):
      seedpool.pop(j)
      seedzero -=1
      continue
     if kpgraph.indistance(kmersequence,temp.sequence(),p_mer):
      temp.fresh=False
      temp.direction=kmerdirection
      pool.append(temp)
      nodetemp.addkmer(temp)
      seedpool.pop(j)
      seedzero -=1
    if seedzero == dgs+1:
     del dg[seedsequence]
    else:
     seedpool[0]=seedzero
     dga[dgs:seedzero]=seedpool
   for i in ksl:
    seedsequence = kmerreverse[basei[i]:limiti[i]]+iti[i]
    if seedsequence not in dg:
     continue
    dgs=dg[seedsequence] 
    seedpool=dga[dgs:dga[dgs]]
    seedzero=seedpool[0]
    for j in range(len(seedpool)-1,0,-1):
     temp=seedpool[j]
     if not(temp.fresh):
      seedpool.pop(j)
      seedzero -=1
      continue
     if kpgraph.indistance(kmersequence,temp.reverse(),p_mer):
      temp.fresh=False
      temp.direction=kmernotdirection
      pool.append(temp)
      nodetemp.addkmer(temp)
      seedpool.pop(j)
      seedzero -=1
    if seedzero == dgs+1:
     del dg[seedsequence]
    else:
     seedpool[0]=seedzero
     dga[dgs:seedzero]=seedpool
 del dga
 del dg
 del pool
 return nodepool

def buildkpgraph2( kmerpool ):
 print 'k_mer :',k_mer
 print 'p_mer :',p_mer
 print 'i_mer :',i_mer
 kpl=len(kmerpool)
#build readpool kmerpool dg
 print 'start kmer dg:',time.time()
 ksl=(p_mer+1)
 ks2=ksl*2
 dg={}
 for j in range(kpl):
  kmertemp =kmerpool[j]
  kmertemp.fresh =True
  seedsequencelist =kmertemp.getseedsequencelist(True)
  for k in range(ksl):
   seedsequence=seedsequencelist[k]
   dgs=dg.get(seedsequence)
   if dgs:
    dgs.append(kmertemp)
   else:
    dg[seedsequence] =[kmertemp]
# build nodepool
 print 'start link kp graph and build node:',time.time()
 nodepool=[]
 pool=[]
# p =p_mer+1
 for i in range(len(kmerpool)):
  kmertemp=kmerpool[i]
  if not kmertemp.fresh:
   continue
  kmertemp.fresh=False
  pool.append(kmertemp)
  nodetemp= node()
  nodetemp.addkmer(kmertemp)
  nodepool.append(nodetemp)
  reqpool={}
  while len(pool):
   kmertemp = pool.pop(0)
   sequence=kmertemp.sequence()
   seedlist =kmertemp.getseedsequencelist(True) + kmertemp.getseedsequencelist(False)
   for r in range(ks2):
    seedr=seedlist[r]
    seedpool= reqpool.get(seedr)
    if seedpool is None:
     seedpool=dg.get(seedr)
     if seedpool:
      reqpool[seedr] =seedpool
     else:
      continue
    if r >=ksl:
     r =r-ksl
     rd =False
    else:
     rd =True
    for j in range(len(seedpool)-1,-1,-1):
     if not seedpool[j].fresh:
      seedpool.pop(j)
      continue
     else:
      temp=seedpool[j]
      if rd:
       if kpgraph.indistance(sequence,temp.sequence(),p_mer):
        temp.direction=kmertemp.direction
        temp.fresh=False
        pool.append(temp)
        nodetemp.addkmer(temp)
        seedpool.pop(j)
      else:
       if kpgraph.indistance(sequence,temp.reverse(),p_mer):
        temp.direction=not kmertemp.direction
        temp.fresh=False
        pool.append(temp)
        nodetemp.addkmer(temp)
        seedpool.pop(j)
 del dg
 del kmerpool
 del pool
 return nodepool

def createread(readlist):
 return [read(readlist[i]) for i in range(readlist)]

def setsolidrdna():
 rdna['A']='A'
 rdna['T']='T'
 rdna['C']='C'
 rdna['G']='G'

def setsolexardna():
 rdna['A']='T'
 rdna['T']='A'
 rdna['C']='G'
 rdna['G']='C'

def inputsingleread(infile1, i_n=i_number, t_n=t_number):
 return inputread(infile1, None, i_n, t_n)

def inputreadlist(infile1,infile2=None, i_n=i_number, t_n=t_number):
 form=infile1.split('.')
 form=form[len(form)-1]
 if form== 'fa' or form == 'fasta':
  setsolexardna()
  if infile2:
   readlist= readfapair(infile1,infile2, i_n, t_n)
  else:
   readlist= readfafile(infile1, i_n, t_n)
 elif form== 'fq' or form == 'fatq' or form == 'fastq':
  setsolexardna()
  if infile2:
   readlist= readfqpair(infile1,infile2, i_n, t_n)
  else:
   readlist= readfqfile(infile1, i_n, t_n)
 elif form== 'csfa' or form == 'csfasta':
  setsolidrdna()
  if infile2:
   readlist= readcsfapair(infile1,infile2, i_n, t_n)
  else:
   readlist= readcsfafile(infile1, i_n, t_n)
 else:
  print 'file format can not read:' + infile1
  sys.exit(1)
 return readlist

def inputread(infile1,infile2=None, i_n=i_number, t_n=t_number):
 readlist = inputreadlist(infile1,infile2, i_n, t_n)
 return [read(readlist[i]) for i in range(len(readlist))]

def inputsread(infile1,infile2=None, i_n=i_number, t_n=t_number):
 readlist=inputreadlist(infile1,infile2, i_n, t_n)
 return [sread(readlist[i]) for i in range(len(readlist))]

def fpreprocessingcontig(nodepool):
 print 'split node:', time.time()
 pool=[]
 for i in range(len(nodepool)):
  pool.extend(nodepool[i].splitnode())
 nodepool=pool
 nl=len(nodepool)
 print 'initial contig:', time.time()
# [nodepool[i].reducekmer() for i in range(nl)]
 return [ contig(nodepool[i]) for i in range(nl) ]

def extractnode(readtemp, i):
 if i:
  return snode(readtemp.sequence[-k_mer:])
 else:
  return snode(readtemp.sequence[:k_mer])

def extractread(readtemp,elength, cov):
 lr= len(readtemp.sequence)
# if lr<= elength:
#  return [sread(readtemp.sequence[:])]
 if lr< elength:
  return []
 else:
  temp=elength/cov
  pool=[sread(readtemp.sequence[j:elength]) for i in range(cov) for j in range(i*temp,lr,elength) ]
  pool.append(sread(readtemp.sequence[-elength:])) 
  return pool

def preprocessingmatepseudo(nodepool,readpool):
 m=0
 pool=[]
 pool2=[]
 for i in range(0,len(nodepool),2):
  n1=nodepool[i]
  l1=len(n1.contiginfo)
  n2=nodepool[i+1]
  l2=len(n2.contiginfo)
  if (l1==0) or (l2==0):
   continue
  elif (l1==1) and (l2==1):
   n1.contiginfo=n1.contiginfo[0]
   n2.contiginfo=n2.contiginfo[0]
   pool.append(n1)
   pool.append(n2)
   pool2.append(readpool[i/2])
  else:
   for j in range(l1):
    for k in range(l2):
     m+=2
     n3=snode(n1.sequence)
     n3.contiginfo=n1.contiginfo[j]
     n4=snode(n2.sequence)
     n4.contiginfo=n2.contiginfo[k]
     pool.append(n3)
     pool.append(n4)
     pool2.append(readpool[i/2])
 print 'multi number',m
 return (pool,pool2)

def preprocessingmatepair(nodepool):
 pool=[]
 for i in range(0,len(nodepool),2):
  n1=nodepool[i]
  l1=len(n1.contiginfo)
  n2=nodepool[i+1]
  l2=len(n2.contiginfo)
  if (l1==0) or (l2==0):
   continue
  elif (l1==1) and (l2==1):
   n1.contiginfo=n1.contiginfo[0]
   n2.contiginfo=n2.contiginfo[0]
   pool.append(n1)
   pool.append(n2)
  else:
   for j in range(l1):
    for k in range(l2):
     n3=snode(n1.sequence)
     n3.contiginfo=n1.contiginfo[j]
     n4=snode(n2.sequence)
     n4.contiginfo=n2.contiginfo[k]
     pool.append(n3)
     pool.append(n4)
 return pool

def killselfmap(contigpool):
# readpool=[sread(temp.sequence) for temp in contigpool ]
 readpool=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) < 2000]
 nodepool=[extractnode(readpool[i/2],i%2) for i in range(2*len(readpool)) ]
 nodepool=multimap( contigpool, nodepool)
 (nodepool,readpool)=preprocessingmatepseudo(nodepool,readpool)
 [setattr(contigtemp,'fresh',True) for contigtemp in contigpool ]
 contigset= set(contigpool)
 for i in range(0,len(nodepool),2):
  (c1,cp1,cd1)=nodepool[i].contiginfo
  if c1 not in contigset:
   continue
  (c2,cp2,cd2)=nodepool[i+1].contiginfo
  if c2 not in contigset:
   continue
  if (c1 == c2) and (cd1 == cd2) and (abs(cp1-cp2) == (len(readpool[i/2].sequence) -k_mer)) and (c1 is not readpool[i/2]):
   lrs=len(readpool[i/2].sequence)
   if cd1 and kpgraph.aligndistance6(c1.sequence[cp1:cp1+lrs],readpool[i/2].sequence, lrs/m_mer):
    readpool[i/2].fresh=False
   elif kpgraph.aligndistance6(c1.sequence[cp2:cp2+lrs],readpool[i/2].reverse, lrs/m_mer):
    readpool[i/2].fresh=False
 return [contigtemp for contigtemp in contigpool if contigtemp.fresh ]

def matefile(inf1,inf2,inf3):
 contigpool=[contig(snode(temp)) for temp in readfastafile(inf1) ]
 readpool=[sread(temp) for temp in readfastafile(inf2) ]
 print str(len(contigpool))+' contigs'
 contigpool = killselfmap(contigpool)
 print 'after kill self mapped contig: '+str(len(contigpool))+' contigs'
 maxlength=max([len(readtemp.sequence) for readtemp in readpool ])
 if t_number>0:
  extractlength=t_number
 else:
  extractlength=100
 cov=i_number
 while extractlength < maxlength:
  print 'maxlength extractlength coveragedeep numbercontig', maxlength, extractlength, cov, len(contigpool)
  readpooltemp=[]
  [readpooltemp.extend(extractread(readtemp, extractlength, cov )) for readtemp in readpool ]
  print str(len(readpooltemp))+' reads extracted'
  nodepool=[extractnode(readpooltemp[i/2],i%2) for i in range(2*len(readpooltemp)) ]
  nodepool=multimap( contigpool, nodepool)
  (nodepool,readpooltemp)=preprocessingmatepseudo(nodepool,readpooltemp)
  print 'after multi setting: nodepool', len(nodepool)
#  contigpool=matepseudo(contigpool,nodepool,readpooltemp,3000)
  contigpool=matepseudo(contigpool,nodepool,readpooltemp,extractlength+1-k_mer)
  extractlength+=extractlength
  cov += cov
 print str(len(contigpool))+' contigs'
 contigpool = killselfmap(contigpool)
 print 'after kill self mapped contig: '+str(len(contigpool))+' contigs'
 writecfafile(inf3,contigpool)
 return

def multimap( contigpool, nodepool):
 gv=globals()
 temp_p_mer=p_mer
 totalnodepool=nodepool[:]
 ltn=len(totalnodepool)
 round=1
# round=p_mer+1
# gv['p_mer']= 0
 for i in range(0,round,1):
  print 'total nodepool, nodepool', ltn, len(nodepool)
  print 'k, p, i mer =',k_mer,p_mer,i_mer
  nodepool= mapkpgraphpc(contigpool,nodepool)
#  if metric == 'approximate':
#   nodepool= mapkpgraphpc(contigpool,nodepool)
#  else:
#   nodepool= alignkpgraphpc(contigpool,nodepool)
  nodepool=[nodetemp for nodetemp in nodepool if not len(nodetemp.contiginfo) ]
  gv['p_mer']+= 1
 gv['p_mer']= temp_p_mer
 print 'total nodepool, nodepool', ltn, len(nodepool)
 return totalnodepool

def distancetrue(a,b,c):
 return True

def selfmate(infile1,infile2):
# inf1='Lepto_contigs.fa'
# inf2='454AllContigs.fna'
 inf1=infile1
 inf2=infile2
 gv=globals()
 round=(k_mer-m_mer)/ i_mer
 for i in range(round):
#  gv['p_mer']= (k_mer-i_mer)/i_mer
  gv['p_mer']= (k_mer/m_mer)
  inf3=inf1+'p'
  inf4=inf2+'p'
  print 'file a',inf1
  stasequence(readfastafile(inf1))
  print 'file b',inf2
  stasequence(readfastafile(inf2))
  matefile(inf1,inf2,inf3)
  print 'file c',inf3
  stasequence(readfastafile(inf3))
  print 'file a',inf2
  stasequence(readfastafile(inf2))
  print 'file b',inf1
  stasequence(readfastafile(inf1))
  matefile(inf2,inf1,inf4)
  print 'file c',inf4
  stasequence(readfastafile(inf4))
  inf1= inf3
  inf2= inf4
  gv['k_mer']-= i_mer
 return

def mapref(contigpool,refpool):
 contigpool=refreshcontig(contigpool)[:]
 nodepool=[extractnode(contigpool[i/2],i%2) for i in range(2*len(contigpool)) ]
 for reftemp in refpool:
  reftemp.nodelist=[ 0 for i in range(len(reftemp.sequence))]
 nodepool=multimap( refpool, nodepool)
 (nodepool,contigpool)=preprocessingmatepseudo(nodepool,contigpool)
 contigset= set(refpool)
 for i in range(0,len(nodepool),2):
  (c1,cp1,cd1)=nodepool[i].contiginfo
  if c1 not in contigset:
   continue
  (c2,cp2,cd2)=nodepool[i+1].contiginfo
  if c2 not in contigset:
   continue
#  if (c1 == c2) and (cd1 == cd2) and (abs(cp1-cp2) == (len(contigpool[i/2].sequence) -k_mer)):
  if (c1 == c2) and (cd1 == cd2):
#   lrs=len(contigpool[i/2].sequence)
   lrs=abs(cp1-cp2) + k_mer
   if cd1 and (cp2>cp1) and kpgraph.aligndistance6(c1.sequence[cp1:cp1+lrs],contigpool[i/2].sequence,int( lrs*(1.0-bubbleidentity))):
    contigpool[i/2].fresh=False
    print len(c1.nodelist),cp1,cp1+lrs
    for j in range(cp1,cp1+lrs,1):
     c1.nodelist[j]=1
   elif (cp1>cp2) and kpgraph.aligndistance6(c1.sequence[cp2:cp2+lrs],contigpool[i/2].reverse, int(lrs*(1.0-bubbleidentity))):
    contigpool[i/2].fresh=False
    print len(c1.nodelist),cp2,cp2+lrs
    for j in range(cp2,cp2+lrs,1):
     c1.nodelist[j]=1
 temppool=[contigtemp for contigtemp in contigpool if not contigtemp.fresh]
 print 'mapped contig:'
 stacontig(list(set(temppool)))
 totallength=sum([len(reftemp.sequence) for reftemp in refpool])
 totalmap=sum([sum(reftemp.nodelist) for reftemp in refpool])
 print 'coverage:',totalmap,'/',totallength,'=', float(totalmap)/float(totallength)
 return 

def selfmap(infile1,infile2):
 mapresult=[]
 print 'file a',infile1
 readpool=[contig(snode(temp)) for temp in readfastafile(infile1) ]
 [setattr(readpool[i],'id',i+1) for i in range(len(readpool)) ]
 stasequence([temp.sequence for temp in readpool ])
 nodepool=[extractnode(readpool[i/2],i%2) for i in range(2*len(readpool)) ]
 print 'file b',infile2
 contigpool=[contig(snode(temp)) for temp in readfastafile(infile2) ]
 [setattr(contigpool[i],'id',i+1) for i in range(len(contigpool)) ]
 stasequence([temp.sequence for temp in contigpool ])
 nodepool=multimap( contigpool, nodepool)
 (nodepool,readpool)=preprocessingmatepseudo(nodepool,readpool)
 contigset= set(contigpool)
 for i in range(0,len(nodepool),2):
  (c1,cp1,cd1)=nodepool[i].contiginfo
  if c1 not in contigset:
   continue
  (c2,cp2,cd2)=nodepool[i+1].contiginfo
  if c2 not in contigset:
   continue
  if (c1 == c2) and (cd1 == cd2) and (abs(cp1-cp2) == (len(readpool[i/2].sequence) -k_mer)):
   lrs=len(readpool[i/2].sequence)
   if cd1 and kpgraph.aligndistance6(c1.sequence[cp1:cp1+lrs],readpool[i/2].sequence, lrs/m_mer):
    mapresult.append((readpool[i/2].id,len(readpool[i/2].sequence),c1.id,len(c1.sequence),cp1+1,cp1+lrs))
   elif kpgraph.aligndistance6(c1.sequence[cp2:cp2+lrs],readpool[i/2].reverse, lrs/m_mer):
    mapresult.append((readpool[i/2].id,len(readpool[i/2].sequence),c1.id,len(c1.sequence),cp2+lrs,cp2+1))
 def cmp(a,b):
  if a[2] > b[2]:
   return 1
  if a[2] < b[2]:
   return -1
  if a[4] > b[4]:
   return 1
  if a[4] < b[4]:
   return -1
  return 0
 tempresult=mapresult[:]
 mapresult=['\t'.join([str(temptemp) for temptemp in temp]) for temp in mapresult ]
 if len(mapresult):
  mapresult='\t\n'.join(mapresult)+'\t\n'
 print 'map result'
 print mapresult
 outf=open(outname+'_map.rtf', 'w+')
 outf.write(mapresult)
 outf.close()
 mapresult=tempresult
 mapresult.sort(cmp)
 mapresult=['\t'.join([str(temptemp) for temptemp in temp]) for temp in mapresult ]
 if len(mapresult):
  mapresult='\t\n'.join(mapresult)+'\t\n'
 print 'sorted result'
 print mapresult
 outf=open(outname+'_sort.rtf', 'w+')
 outf.write(mapresult)
 outf.close()
 return

def denovoassembly(infile1,infile2 ):
 gv= globals()
 print 'initial phase:', time.time()
 readpool= inputread(infile1,infile2, i_number, t_number)
 rl=len(readpool)
 readlength=len(readpool[0].sequence)
# gv['kmerweightlist'] = getkmerweight(readlength)
 lkl=sum([readpool[i].kl for i in range(rl)])
 print 'phase1:', time.time()
 kmerpool=sbuildkpgraph( readpool )
 nl =len(kmerpool)
 gv['idealnumber']=int(math.ceil(float(lkl)/nl))
 print 'rl',rl
 print 'readlength',readlength
 print 'kmer pool number',lkl
 print 'nl',nl
 print 'idealnumber', idealnumber
 if p_mer==0:
  nodepool=[node([kmertemp]) for kmertemp in kmerpool]
  new=3
 else:
  print 'phase2:', time.time()
  kmerl=lkl
  lkl=len(kmerpool)
#  nodepool=buildkpgraph( kmerpool )
  nodepool=buildkpgraphpc( kmerpool )
  nl =len(nodepool)
# totallength = nl+( k_mer-1)
# totallength = float(totallength)/ (1.0 - repeatparameter)
# if totallength >=4000000.0:
#  totallength =4000000.0
# depth= float(rl* readlength)/ totallength
# kmerdepth =depth* (float(kl)/ float(readlength))
# idealnumber= int(kmerdepth* ( 1.0- ununiformparameter))
  gv['idealnumber']=int(math.ceil(float(kmerl)/nl))
  print 'kmer pool number',lkl
  print 'nl',nl
  print 'idealnumber', idealnumber
  print 'phase3:', time.time()
# new = 0,2,3,5
  new=5
 if new ==0:
  contigpool=fpreprocessingcontig(nodepool)
 if new ==1:
  contigpool= spreprocessingcontig(nodepool)
 if new ==2:
  contigpool=preprocessingcontig(nodepool)
 if new ==3:
  contigpool=tpreprocessingcontig(nodepool)
 if new ==4:
  contigpool=cpreprocessingcontig(nodepool)
 if new ==5:
  contigpool=npreprocessingcontig(nodepool)
 print 'number contig:'+str(len(contigpool))
 print 'phase4 readsetkmer:', time.time()
 nodepool =readsetkmer(readpool)
 del kmerpool
 print 'phase5 extendcontig:', time.time()
 contigpool = extendcontig(contigpool)
 print 'phase6 assemblecontig:', time.time()
# contigpool2 = copy.deepcopy( contigpool)
# contigpool=assemblecontig2(contigpool, readpool, readlength)
 contigpool=assemblecontig1(contigpool)
 print 'sixth contig set: materead'
 contigpool = materead(contigpool, nodepool, readpool)
 stacontig(contigpool)
 contigpool= postprocessingcontig(contigpool)


def fdenovoassemblytrans(infile1,infile2 ):
 gv= globals()
 print 'initial phase:', time.time()
 readpool= inputreadlist(infile1,infile2, i_number, t_number)
 rl=len(readpool)
 readlength=len(readpool[0])
# gv['kmerweightlist'] = getkmerweight(readlength)
 lkl=sum([len(readpool[i]) for i in range(rl)])-rl*(k_mer-1)
 print 'phase1 preprocessingcontigpc:', time.time()
 [contigpool,nodepool,matepool]= preprocessingcontigtranspc(readpool)
 nl =len(nodepool)
 gv['idealnumber']=int(math.ceil(float(lkl)/nl))
 print 'rl',rl
 print 'readlength',readlength
 print 'kmer pool number',lkl
 print 'nl',nl
 print 'idealnumber', idealnumber
 print 'number contig:'+str(len(contigpool))
 print 'phase5 extendcontig:', time.time()
 contigpool = extendcontigtrans(contigpool)
 print 'phase6 assemblecontig:', time.time()
# contigpool2 = copy.deepcopy( contigpool)
# contigpool=assemblecontig2(contigpool, readpool, readlength)
 contigpool=assemblecontigtrans(contigpool)
 print 'sixth contig set: materead'
 nodepool= matepool
 contigpool = materead(contigpool, nodepool, readpool)
 stacontig(contigpool)
 contigpool= postprocessingcontig(contigpool)

def getfragment(contigpool,nodepool,readpool):
 contigset=set(contigpool)
 no=(None,None,None)
 fragmentpool=nodepool[:]
 fragmentread=readpool[:]
 kpp=k_mer+1
 for i in range(0,len(fragmentpool),4):
  fragmentread[i/2]=readpool[i/2][:kpp]
  fragmentread[(i+2)/2]=readpool[(i+2)/2][-kpp:]
  (temp,c1,cp1)=nodepool[i]
  (temp,c2,cp2)=nodepool[i+1]
  (temp,c3,cp3)=nodepool[i+2]
  (temp,c4,cp4)=nodepool[i+3]
  if c1 not in contigset or c2 not in contigset or c3 not in contigset or c4 not in contigset:
   fragmentpool[i+1]=no
   fragmentpool[i+2]=no
   continue
  if c1 != c2:
   if c2 in c1.next:
    if cp1 +1 == len(c1.nodelist):
     if c1.next.count(c2) >1:
      fragmentpool[i+1]=no
      fragmentpool[i+2]=no
      continue
     else:
      if c1 in c2.pre:
       fragmentpool[i+1]=c2.nodelist[0]
      else:
       fragmentpool[i+1]=c2.nodelist[len(c2.nodelist) -1]
    else:
     fragmentpool[i+1]=c1.nodelist[cp1+1]
   elif c2 in c1.pre:
    if cp1 == 0:
     if c1.pre.count(c2) >1:
      fragmentpool[i+1]=no
      fragmentpool[i+2]=no
      continue
     else:
      if c1 in c2.pre:
       fragmentpool[i+1]=c2.nodelist[0]
      else:
       fragmentpool[i+1]=c2.nodelist[len(c2.nodelist) -1]
    else:
     fragmentpool[i+1]=c1.nodelist[cp1-1]
   else:
    fragmentpool[i+1]=no
    fragmentpool[i+2]=no
    continue
  else:
   if cp1 < cp2:
    fragmentpool[i+1]=c1.nodelist[cp1+1]
   else:
    fragmentpool[i+1]=c1.nodelist[cp1-1]

  if c3 != c4:
   if c3 in c4.next:
    if cp4+1 == len(c4.nodelist):
     if c4.next.count(c3) >1:
      fragmentpool[i+1]=no
      fragmentpool[i+2]=no
      continue
     else:
      if c4 in c3.pre:
       fragmentpool[i+2]=c3.nodelist[0]
      else:
       fragmentpool[i+2]=c3.nodelist[len(c3.nodelist) -1]
    else:
     fragmentpool[i+2]=c4.nodelist[cp4+1]
   elif c3 in c4.pre:
    if cp4 == 0:
     if c4.pre.count(c3) >1:
      fragmentpool[i+1]=no
      fragmentpool[i+2]=no
      continue
     else:
      if c4 in c3.pre:
       fragmentpool[i+2]=c3.nodelist[0]
      else:
       fragmentpool[i+2]=c3.nodelist[len(c3.nodelist) -1]
    else:
     fragmentpool[i+2]=c4.nodelist[cp4-1]
   else:
    fragmentpool[i+1]=no
    fragmentpool[i+2]=no
    continue
  else:
   if cp3 > cp4:
    fragmentpool[i+2]=c4.nodelist[cp4+1]
   else:
    fragmentpool[i+2]=c4.nodelist[cp4-1]
 return [fragmentpool,fragmentread]

def fdenovoassembly(infile1,infile2 ):
 gv= globals()
 if k_mer == 1:
  gv['k_mer']=2
# if p_mer == 0:
#  gv['p_mer']=1
#  gv['splitparameter']=0.0
#  gv['idealerrorfraction']=0.0
 if i_mer == None:
  gv['i_mer']=k_mer/(p_mer+1)
 if m_mer == None:
  gv['m_mer']=(k_mer/2)+1
 print 'initial phase:', time.time()
 if isinstance(infile1,list):
  readpoollist=[]
  readpool=[]
  for i in range(len(infile1)):
   if infile2==None:
    readtemp= inputreadlist(infile1[i],None, i_number, t_number)
   else:
    readtemp= inputreadlist(infile1[i],infile2[i], i_number, t_number)
   readpoollist.append(readtemp)
   readpool+=readtemp
 else:
  readpool= inputreadlist(infile1,infile2, i_number, t_number)
 rl=len(readpool)
 readlength=len(readpool[0])
# gv['kmerweightlist'] = getkmerweight(readlength)
 lkl=0
 for i in range(rl):
  readlength=len(readpool[i])
  if readlength < k_mer:
   switch()
   print 'The -k is larger than the length of read, so retry with a smaller number.'
   switch()
   sys.exit(1)
  lkl+=readlength
 lkl= lkl-(rl*(k_mer-1))
 print 'phase1 preprocessingcontigpc:', time.time()
 [contigpool,nodepool,matepool]= preprocessingcontigpc(readpool)
 nl =len(nodepool)
 gv['idealnumber']=int(math.ceil(float(lkl)/nl))
 print 'rl',rl
 print 'readlength',readlength
 print 'kmer pool number',lkl
 print 'nl',nl
 print 'idealnumber', idealnumber
 print 'number contig:'+str(len(contigpool))
 print 'phase5 extendcontig:', time.time()
# contigpool = extendcontig(contigpool)
 switch()
 stapath(contigpool)
 switch()
 print 'phase6 assemblecontig:', time.time()
# contigpool2 = copy.deepcopy( contigpool)
# contigpool=assemblecontig2(contigpool, readpool, readlength)
 nodepool= matepool
 if reference:
  gv['reference']= [contig(snode(temp)) for temp in readfastafile(reference) ]
#what the fuck
 if lateestimate:
  if premate:
   print 'sixth contig set: materead',time.time()
   contigpool = materead(contigpool, nodepool, readpool)
   stacontig(contigpool)
   contigpool=cleancontig(contigpool)
   contigpool=extendcontig(contigpool)
  contigpool=assemblecontig1(contigpool,nodepool)
  if repeatparameter > 0 and prerep:
   print 'remove repeat:'
   contigpool=removerepeat(contigpool)
   stacontig(contigpool)
  print 'sixth contig set: materead',time.time()
  contigpool = materead(contigpool, nodepool, readpool)
  stacontig(contigpool)
  switch()
  print 'Iteratively relink graph with shorter k-mer.'
  stapath(contigpool)
  switch()
  contigpool=cleancontig(contigpool)
  contigpool=extendcontig(contigpool)
#what the fuck
 if isinstance(infile1,list):
  nodepoollist=[]
  rcount=0
  for i in range(len(infile1)):
   j=2*len(readpoollist[i])  
   nodepoollist.append(nodepool[rcount:rcount+j])
   rcount+=j
 if frag>0:
  fragmentpoollist=[]
  fragmentreadlist=[]
# estimate insertsize
 if infile2:
  if isinstance(infile1,list):
   if insertsize ==None:
    contigpool = ssetnodeinfo( contigpool)
    insertsizelist=[]
    switch()
    print 'Estimate insertsizes.'
    switch()
    for i in range(len(infile1)):
     insertsizelist.append(estimateinsertsize(contigpool,nodepoollist[i],readpoollist[i]))
     switch()
     print 'insertsize'+str(i+1)+':',insertsizelist[i]
     switch()
     if i < frag:
      [fragmentpool,fragmentread]=getfragment(contigpool,nodepoollist[i],readpoollist[i])
      fragmentpoollist.append(fragmentpool)
      fragmentreadlist.append(fragmentread)
    globals()['insertsize'] =insertsizelist
   else:
    contigpool = ssetnodeinfo( contigpool)
    insertsizelist=[]
    for i in range(len(infile1)):
     estimateinsertsize(contigpool,nodepoollist[i],readpoollist[i])
     print 'Given insertsize=',str(insertsize[i])
     insertsizelist.append(insertsize[i])
     if i < frag:
      [fragmentpool,fragmentread]=getfragment(contigpool,nodepoollist[i],readpoollist[i])
      fragmentpoollist.append(fragmentpool)
      fragmentreadlist.append(fragmentread)
    globals()['insertsize'] =insertsizelist
   if insertstd is None:
    insertstdlist=[]
    for i in range(len(insertsize)):
     insertstdlist.append(insertsize[i]*insertstdfraction)
    globals()['insertstd'] =insertstdlist
  else:
   if insertsize ==None:
    contigpool = ssetnodeinfo( contigpool)
    switch()
    print 'Estimate insertsizes.'
    switch()
    globals()['insertsize'] =estimateinsertsize(contigpool,nodepool,readpool)
    switch()
    print 'insertsize:',insertsize
    switch()
    if frag:
     [fragmentpool,fragmentread]=getfragment(contigpool,nodepool,readpool)
     fragmentpoollist.append(fragmentpool)
     fragmentreadlist.append(fragmentread)
   else:
    contigpool = ssetnodeinfo( contigpool)
    estimateinsertsize(contigpool,nodepool,readpool)
    print 'Given insertsize=',str(insertsize)
    globals()['insertsize'] =insertsize
    if frag:
     [fragmentpool,fragmentread]=getfragment(contigpool,nodepool,readpool)
     fragmentpoollist.append(fragmentpool)
     fragmentreadlist.append(fragmentread)
   if insertstd ==None:
    globals()['insertstd'] =insertsize*insertstdfraction
# what the fuck
 if not lateestimate:
  if premate:
   print 'sixth contig set: materead',time.time()
   contigpool = materead(contigpool, nodepool, readpool)
   stacontig(contigpool)
   contigpool=cleancontig(contigpool)
   contigpool=extendcontig(contigpool)
  contigpool=assemblecontig1(contigpool,nodepool)
  if repeatparameter > 0 and prerep:
   print 'remove repeat:'
   contigpool=removerepeat(contigpool)
   stacontig(contigpool)
  print 'sixth contig set: materead',time.time()
  contigpool = materead(contigpool, nodepool, readpool)
  stacontig(contigpool)
  switch()
  print 'Iteratively relink graph with shorter k-mer.'
  stapath(contigpool)
  switch()
  contigpool=cleancontig(contigpool)
  contigpool=extendcontig(contigpool)
# what the fuck
 if repeatparameter > 0 and infile2 is None:
  print 'remove repeat:'
  contigpool=removerepeat(contigpool)
  stacontig(contigpool)
 if infile2:
  if frag:
   if isinstance(s_number,list):
    temp_s_number=s_number[:] 
   else:
    temp_s_number=s_number
   for i in range(0,frag,1):
    if isinstance(temp_s_number,list):
     gv['s_number']=temp_s_number[i]
    print 'matepair '+str(i)+'th round:'
    print 'scaffold set:',time.time()
    contigpool=matepair(contigpool,fragmentpoollist[i],fragmentreadlist[i],insertsize[i],insertstd[i])
    stacontig(contigpool)
    if reference:
     mapref(contigpool,reference)
    showscaffoldstructure(contigpool)
    contigpool=cleancontig(contigpool)
    contigpool=extendcontig(contigpool)
    print 'prograsive scaffold set:',time.time()
    contigpool=matepair_p(contigpool,fragmentpoollist[i],fragmentreadlist[i],insertsize[i],insertstd[i])
    showscaffoldstructure(contigpool)
    stacontig(contigpool)
    contigpool=cleancontig(contigpool)
    contigpool=extendcontig(contigpool)
    stacontig(contigpool)
    showcontigstructure(contigpool)
    switch()
    print 'Run a scaffolding on fragment reads.'
    print 'Scaffold set:',str(i+1)
    stacontig(contigpool)
    switch()
   if isinstance(temp_s_number,list):
    gv['s_number']=temp_s_number
 contigpool= postprocessingcontig(contigpool)
 if infile2:
  if isinstance(infile1,list):
   if isinstance(s_number,list):
    temp_s_number=s_number[:] 
   else:
    temp_s_number=s_number
   for i in range(frag,len(infile1),1):
    if isinstance(temp_s_number,list):
     gv['s_number']=temp_s_number[i]
    print 'matepair '+str(i)+'th round:'
    print 'scaffold set:',time.time()
    contigpool=matepair(contigpool,nodepoollist[i],readpoollist[i],insertsize[i],insertstd[i])
    stacontig(contigpool)
    if reference:
     mapref(contigpool,reference)
    showscaffoldstructure(contigpool)
    contigpool=cleancontig(contigpool)
    contigpool=extendcontig(contigpool)
#    if repeatparameter > 0:
#     print 'remove repeat:'
#     contigpool=removerepeat(contigpool)
#     stacontig(contigpool)
    print 'prograsive scaffold set:',time.time()
    contigpool=matepair_p(contigpool,nodepoollist[i],readpoollist[i],insertsize[i],insertstd[i])
    showscaffoldstructure(contigpool)
    stacontig(contigpool)
    contigpool=cleancontig(contigpool)
    contigpool=extendcontig(contigpool)
    stacontig(contigpool)
    showcontigstructure(contigpool)
#    if repeatparameter > 0:
#     print 'remove repeat:'
#     contigpool=removerepeat(contigpool)
#     stacontig(contigpool)
    switch()
    print 'Run a scaffolding.'
    print 'Scaffold set:',str(i+1)
    stacontig(contigpool)
    switch()
   if isinstance(temp_s_number,list):
    gv['s_number']=temp_s_number
  else:
   print 'scaffold set:',time.time()
   contigpool=matepair(contigpool,nodepool,readpool,insertsize,insertstd)
   stacontig(contigpool)
   if reference:
    mapref(contigpool,reference)
   showscaffoldstructure(contigpool)
   contigpool=cleancontig(contigpool)
   contigpool=extendcontig(contigpool)
#   if repeatparameter > 0:
#    print 'remove repeat:'
#    contigpool=removerepeat(contigpool)
#    stacontig(contigpool)
   print 'prograsive scaffold set:',time.time()
   contigpool=matepair_p(contigpool,nodepool,readpool,insertsize,insertstd)
   showscaffoldstructure(contigpool)
   stacontig(contigpool)
   contigpool=cleancontig(contigpool)
   contigpool=extendcontig(contigpool)
   stacontig(contigpool)
   showcontigstructure(contigpool)
   switch()
   print 'Run a scaffolding'
   print 'Scaffold set:'
   stacontig(contigpool)
   switch()
#   if repeatparameter > 0:
#    print 'remove repeat:'
#    contigpool=removerepeat(contigpool)
#    stacontig(contigpool)
  if repeatparameter > 0:
   print 'remove repeat:'
   contigpool=removerepeat(contigpool)
   stacontig(contigpool)
   switch()
   print 'Final scaffold set:'
   stacontig(contigpool)
   switch()
  writecfafile(outname+'_scaffold.fasta', contigpool)
  print 'end:',time.time()
#  contigpool=cleancontig(contigpool)
#  contigpool=refreshcontig(contigpool)
#  contigpool= linkcontig(contigpool,1)
#  contigpool= mergecontig(contigpool,1)

def preprocessingcontigtranspc( readpool):
 secret='-'+str(k_mer)+'-'+str(p_mer)+'-'+str(splitparameter)+'-'+str(idealerrorfraction)+'-'+fixstring(infile1)+'-'+fixstring(infile2)+'-'+str(i_number)+'-'+str(t_number)
 sysfile1 = outname+'systemfilec1'+secret
 sysfile2 = outname+'systemfilec2'+secret
 sysfile3 = outname+'systemfilec3'+secret
 sysfile4 = outname+'systemfilec4'+secret
 sysfile5 = outname+'systemfilec5'+secret
 secret=[sysfile1,sysfile2,sysfile3,sysfile4,sysfile5]
 if os.path.isfile(secret[0]):
  waiting=1
  while (not os.path.isfile(secret[4]))and os.path.isfile(secret[0]):
   if waiting:
    waiting=0
    switch()
    print 'Waiting the preprocessing contig, if you don\'t want to wait, type:'
    print 'rm -f '+secret[0]
    switch()
   time.sleep(60)
 fast= all([os.path.isfile(secret[i]) for i in range(len(secret))])
 if not fast:
  sfile=open(sysfile1, 'w+' )
  if len(readpool):
   sfile.write('\n'.join([temp for temp in readpool])+'\n')
  sfile.close()
  print 'start preprocessingcontigc',time.time()
  kpgraph.preprocessingcontigtransc(k_mer,p_mer,i_mer, splitparameter, idealerrorfraction, sysfile1, sysfile2, sysfile3, sysfile4)
  print 'end preprocessingcontigc',time.time()
  sysfile5=open(sysfile5,'w')
  sysfile5.close()
  switch()
  print 'Finish the first step.'
  switch()
 sysfile2=open(sysfile2, 'r' )
 tempf=sysfile2.read()
 tempf2=tempf.split('\n')
 tempf2.pop()
 sysfile2.close()
 sysfile3=open(sysfile3, 'r' )
 tempf=sysfile3.read()
 tempf3=tempf.split(',')
 tempf3.pop()
 sysfile3.close()
 sysfile4=open(sysfile4, 'r' )
 tempf=sysfile4.read()
 tempf4=tempf.split(',')
 tempf4.pop()
 sysfile4.close()
 def rsetattr(a,b,c):
  setattr(a,b,c)
  return a
 nodepool=[[int(temp),None,None] for temp in tempf3 ]
# nodepool=[snode() for temp in tempf3 ]
# [setattr(nodepool[i],'weight',int(tempf3[i])) for i in range(len(tempf3)) ]
 matepool=[nodepool[int(temp)] for temp in tempf4 ]
 tl=0
 contigpool=[]
 for temp in tempf2:
  nl=len(temp) +1 -k_mer
  contigpool.append(scontig(nodepool[tl:tl+nl],temp))
  tl += nl
 if fast:
  switch()
  print 'This type of input had be done, so we could run faster.'
  print 'reads:',str(len(matepool)/2)
  print 'nodes:',str(len(nodepool))
  print 'paths:',str(len(contigpool))
  switch()
 return [contigpool,nodepool,matepool]


def preprocessingcontigpc( readpool):
 secret='-'+str(k_mer)+'-'+str(p_mer)+'-'+str(splitparameter)+'-'+str(idealerrorfraction)+'-'+fixstring(infile1)+'-'+fixstring(infile2)+'-'+str(i_number)+'-'+str(t_number)
 sysfile1 = outdir+'systemfilec1'+secret
 sysfile2 = outdir+'systemfilec2'+secret
 sysfile3 = outdir+'systemfilec3'+secret
 sysfile4 = outdir+'systemfilec4'+secret
 sysfile5 = outdir+'systemfilec5'+secret
 sysfile6 = outdir+'systemfilec6'+secret
 secret=[sysfile1,sysfile2,sysfile3,sysfile4,sysfile5,sysfile6]
 if os.path.isfile(secret[0]):
  waiting=1
  while (not os.path.isfile(secret[5]))and os.path.isfile(secret[0]):
   if waiting:
    waiting=0
    switch()
    print 'Waiting the preprocessing contig, if you don\'t want to wait, type:'
    print 'rm -f '+secret[0]
    switch()
   time.sleep(60)
 fast= all([os.path.isfile(secret[i]) for i in range(len(secret))])
 if not fast:
  sfile=open(sysfile1, 'w+' )
  if len(readpool):
   sfile.write('\n'.join([temp for temp in readpool])+'\n')
  sfile.close()
  print 'start preprocessingcontigc',time.time()
  kpgraph.preprocessingcontigc(k_mer,p_mer,i_mer, splitparameter, idealerrorfraction, sysfile1, sysfile2, sysfile3, sysfile4, sysfile5)
  print 'end preprocessingcontigc',time.time()
  sysfile6=open(sysfile6,'w')
  sysfile6.close()
  switch()
  print 'Finish the first step.'
  switch()
 sysfile2=open(sysfile2, 'r' )
 tempf=sysfile2.read()
 tempf2=tempf.split('\n')
 tempf2.pop()
 sysfile2.close()
 sysfile3=open(sysfile3, 'r' )
 tempf=sysfile3.read()
 tempf3=tempf.split(',')
 tempf3.pop()
 sysfile3.close()
 sysfile4=open(sysfile4, 'r' )
 tempf=sysfile4.read()
 tempf4=tempf.split(',')
 tempf4.pop()
 sysfile4.close()
 sysfile5=open(sysfile5, 'r' )
 tempf=sysfile5.read()
 tempf5=tempf.split('\n')
 tempf5.pop()
 sysfile5.close()
 def rsetattr(a,b,c):
  setattr(a,b,c)
  return a
 nodepool=[[int(temp),None,None] for temp in tempf3 ]
# nodepool=[snode() for temp in tempf3 ]
# [setattr(nodepool[i],'weight',int(tempf3[i])) for i in range(len(tempf3)) ]
 matepool=[nodepool[int(temp)] for temp in tempf4 ]
 tl=0
 contigpool=[]
 for temp in tempf2:
  nl=len(temp) +1 -k_mer
  contigpool.append(scontig(nodepool[tl:tl+nl],temp))
  tl += nl
 for i in range(len(tempf5)):
  tempf6=tempf5[i].split('\t')
  if len(tempf6[0]):
   tempf7=tempf6[0].split(',')
   tempf7.pop()
   for temp in tempf7:
    contigpool[i].pre.append(contigpool[int(temp)])
   tempf7=tempf6[1].split(',')
   tempf7.pop()
   for temp in tempf7:
    if temp == '1':
     contigpool[i].predirection.append(True)
    else:
     contigpool[i].predirection.append(False)
  if len(tempf6[2]):
   tempf7=tempf6[2].split(',')
   tempf7.pop()
   for temp in tempf7:
    contigpool[i].next.append(contigpool[int(temp)])
   tempf7=tempf6[3].split(',')
   tempf7.pop()
   for temp in tempf7:
    if temp == '1':
     contigpool[i].nextdirection.append(True)
    else:
     contigpool[i].nextdirection.append(False)
 if fast:
  switch()
  print 'This type of input had be done, so we could run faster.'
  print 'reads:',str(len(matepool)/2)
  switch()
 return [contigpool,nodepool,matepool]


def denovoassemblytrans(infile1,infile2 ):
 gv= globals()
 print 'initial phase:', time.time()
 readpool= inputread(infile1,infile2, i_number, t_number)
 rl=len(readpool)
 readlength=len(readpool[0].sequence)
# gv['kmerweightlist'] = getkmerweight(readlength)
 lkl=sum([readpool[i].kl for i in range(rl)])
 print 'phase1:', time.time()
 kmerpool=sbuildkpgraphtrans( readpool )
 nl =len(kmerpool)
 gv['idealnumber']=int(math.ceil(float(lkl)/nl))
 print 'rl',rl
 print 'readlength',readlength
 print 'kmer pool number',lkl
 print 'nl',nl
 print 'idealnumber', idealnumber
 if p_mer==0:
  nodepool=[node([kmertemp]) for kmertemp in kmerpool]
  contigpool=[contig(nodetemp) for nodetemp in nodepool ]
 else:
  print 'phase2:', time.time()
  kmerl=lkl
  lkl=len(kmerpool)
  nodepool=buildkpgraphtranspc( kmerpool )
  nl =len(nodepool)
  gv['idealnumber']=int(math.ceil(float(kmerl)/nl))
  print 'kmer pool number',lkl
  print 'nl',nl
  print 'idealnumber', idealnumber
  print 'phase3:', time.time()
  contigpool=npreprocessingcontigtrans(nodepool)
 print 'number contig:'+str(len(contigpool))
 print 'phase4 readsetkmer:', time.time()
 nodepool =readsetkmer(readpool)
 del kmerpool
 print 'phase5 extendcontig:', time.time()
 contigpool = extendcontigtrans(contigpool)
 print 'phase6 assemblecontig:', time.time()
 contigpool=assemblecontigtrans(contigpool)
 print 'sixth contig set: materead'
 contigpool = materead(contigpool, nodepool, readpool)
 stacontig(contigpool)
 contigpool= postprocessingcontig(contigpool)


def findsmrna(infile1,infile2 ):
 gv= globals()
 if i_number==0:
  gv['i_number']=5
 print 'initial phase:', time.time()
# inputfile='clean-fa.txt'
 inputfile=infile1
 readlist=readfawfile(inputfile, 1000000000)
# readpool= inputread(infile1,infile2, i_number, t_number)
 rl=len(readlist)
 totalreads=sum([temp[1] for temp in readlist])
 print 'rl',rl
 print 'totalreads',totalreads
 readpool= [temp for temp in readlist if temp[1] >= i_number]
 readpool=[read(readtemp[0],readtemp[1]) for readtemp in readpool]
 rl=len(readpool)
# gv['kmerweightlist'] = getkmerweight(readlength)
 lkl=sum([readpool[i].kl for i in range(rl)])
 print 'phase1:', time.time()
 kmerpool=sbuildkpgraphtrans( readpool )
 nl =len(kmerpool)
 totalreads=sum([temp.weight for temp in readpool ])
 weightsum=sum([temp.weight for temp in kmerpool ])
 gv['idealnumber']=weightsum/nl
 print 'rl',rl
 print 'totalreads',totalreads
 print 'kmer pool number',lkl
 print 'nl',nl
 print 'idealnumber', idealnumber
 if p_mer==0:
  nodepool=[node([kmertemp]) for kmertemp in kmerpool]
  contigpool=[contig(nodetemp) for nodetemp in nodepool ]
 else:
  print 'phase2:', time.time()
  kmerl=lkl
  lkl=len(kmerpool)
#  nodepool=buildkpgraph( kmerpool )
  nodepool=buildkpgraphtranspc( kmerpool )
  nl =len(nodepool)
  gv['idealnumber']=weightsum/nl
  print 'kmer pool number',lkl
  print 'nl',nl
  print 'idealnumber', idealnumber
  print 'phase3:', time.time()
  contigpool=npreprocessingcontigtrans(nodepool)
 del kmerpool
 print 'phase5 extendcontig:', time.time()
 contigpool = extendcontigtrans(contigpool)
 print 'phase6 assemblecontig:', time.time()
 stacontig(contigpool)
# contigout=[contigtemp for contigtemp in contigpool if len(contigtemp.pre) or len(contigtemp.next) ]
# print 'pruned contig with assemble structure'
# stacontig(contigout)
# contigpool=list(set(contigpool)-set(contigout))
# print 'remaining contig:'
# stacontig(contigpool)
 constrain=21
 contigout=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) < constrain]
 print 'contig with length less than 21'
 print 'total reads:',sum([temp.weight for temp in contigout ])
 stacontig(contigout)
# contigpool=list(set(contigpool)-set(contigout))
# print 'remaining contig:'
# stacontig(contigpool)
 for i in range(19,22,1):
  contigpool=removetips(contigpool,i)
  contigpool=removeerroneousconnections(contigpool,i,i_number)
  contigpool=oldremovetips(contigpool,i)
  contigpool=oldremoveerroneousconnections(contigpool,i,i_number)
 contigpool=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) >= 21]
 contigpool= prunecontig(contigpool)
 print 'reassemble contig with length less than 21 :'
 print 'total reads:',sum([temp.weight for temp in contigpool ])
 stacontig(contigpool)
 print 'pruned contig with length more than 24'
 contigpool=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) <= 24 ]
 print 'total reads:',sum([temp.weight for temp in contigpool ])
 stacontig(contigpool)
# contigout=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) > 22 or len(contigtemp.sequence) < 21]
# print 'pruned contig with length out of 21, 22'
# readout=[readtemp for readtemp in readpool if len(readtemp.sequence) == 22 or len(readtemp.sequence) == 21]
# print 'number of read with length 21,22', len(readout)
# print 'weight of read with length 21,22', sum([readtemp.weight for readtemp in readout])
# print 'number of contig with length 21,22', len(contigpool)
# print 'weight of contig with length 21,22', sum([contigtemp.weight for contigtemp in contigpool])
 # contigpool=assemblecontigtrans(contigpool)
 print 'phase7 map contig:', time.time()
 mapsmrna(contigpool)


def assemblecontig2(contigpool, readpool, readlength):
#contig length constrain =100
# contiglengthconstrain=readlength-(readlength+1-k_mer)//2
# contiglengthconstrain=readlength
# contiglengthconstrain=100
# contigweightconstrain =idealconstrain( contigpool)
# contigpool=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) >= contiglengthconstrain or len(contigtemp.pre) or len(contigtemp.next) ]
# contigpool=[contigpool[i] for i in range(len(contigpool)) if (len(contigpool[i].sequence) >= contiglengthconstrain) or(( len(contigpool[i].pre) >0) and ( len(contigpool[i].next) >0))]
# contigpool=[contigpool[i] for i in range(len(contigpool)) if (len(contigpool[i].sequence) >= contiglengthconstrain) and(contigpool[i].weight >= idealnumber)]
# contigpool=[contigpool[i] for i in range(len(contigpool)) if (len(contigpool[i].sequence) >= contiglengthconstrain)]
# stacontig(contigpool)
# contigpool= prunecontig(contigpool)
# contigpool= removetips(contigpool, contiglengthconstrain)
# contigpool= removecontigs(contigpool, contiglengthconstrain)
# contigpool= refreshcontig(contigpool)
# contigpool= mergecontig(contigpool)
# contigpool= removeerroneousconnections(contigpool, contiglengthconstrain)
# contigpool = multidistancecontig(contigpool,distance):
# contigpool = materead(contigpool, readpool, readlength)
# contigpool = setnodeinfo( contigpool)
# contigpool=  removebubbles(contigpool,0.9)
 maxweight= 10 + max([contigtemp.weight for contigtemp in contigpool ])
 (largelength,largeweight)= max([(len(contigtemp.sequence),contigtemp.weight) for contigtemp in contigpool ])
 contigweightconstrain= largeweight* idealfraction1
 print 'initial contig:'
 stacontig(contigpool)
 print 'second contig:tips'
 contiglengthconstrain= k_mer +int( k_mer/3)
 contigpool= removetips(contigpool, contiglengthconstrain,contigweightconstrain)
 stacontig(contigpool)
 print 'fourth contig-erroneous connections:'
 contigpool= removeerroneousconnections(contigpool, contiglengthconstrain,contigweightconstrain)
 stacontig(contigpool)
 print 'second contig:tips'
 contiglengthconstrain= k_mer +int(2* k_mer/3)
 contigpool= removetips(contigpool, contiglengthconstrain,contigweightconstrain)
 stacontig(contigpool)
 print 'fourth contig-erroneous connections:'
 contigpool= removeerroneousconnections(contigpool, contiglengthconstrain,contigweightconstrain)
 stacontig(contigpool)
 print 'second contig:tips'
 contiglengthconstrain= k_mer +k_mer
 contigpool= removetips(contigpool, contiglengthconstrain,contigweightconstrain)
 stacontig(contigpool)
 print 'fourth contig-erroneous connections:'
 contigpool= removeerroneousconnections(contigpool, contiglengthconstrain,contigweightconstrain)
 stacontig(contigpool)
# print 'multidistance',i_mer
# contigpool = multidistancecontig(contigpool, i_mer)
# stacontig(contigpool)
# print 'sixth contig- materead'
# contigpool = setnodeinfo( contigpool)
# contigpool = materead(contigpool, readpool, readlength)
# stacontig(contigpool)
# print('third contig:bubbles')
# contigpool=  removebubbles(contigpool,0.9)
# stacontig(contigpool)
 print('first contig:contig length constrain =100')
 contiglengthconstrain=100
 contigpool=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) >= contiglengthconstrain or len(contigtemp.pre) or len(contigtemp.next) ]
 stacontig(contigpool)
 idealcontig=maxweight * idealfraction1
 print('fourth contig-contig constrain :',idealcontig)
 contiglengthconstrain= k_mer +int( k_mer/3)
 contigpool= removetips(contigpool, contiglengthconstrain,idealcontig)
 contigpool= removeerroneousconnections(contigpool, contiglengthconstrain,idealcontig)
 contiglengthconstrain= k_mer +int(2* k_mer/3)
 contigpool= removetips(contigpool, contiglengthconstrain,idealcontig)
 contigpool= removeerroneousconnections(contigpool, contiglengthconstrain,idealcontig)
 contiglengthconstrain= k_mer +k_mer
 contigpool= removetips(contigpool, contiglengthconstrain,idealcontig)
 contigpool= removeerroneousconnections(contigpool, contiglengthconstrain,idealcontig)
 stacontig(contigpool)
# print('sixth contig- materead')
# contigpool = setnodeinfo( contigpool)
# contigpool = materead(contigpool, readpool, readlength)
# stacontig(contigpool)
# print('third contig:bubbles')
# contigpool=  removebubbles(contigpool,0.9)
# stacontig(contigpool)
# print('sixth contig- materead')
# contigpool = setnodeinfo( contigpool)
# contigpool = materead(contigpool, readpool, readlength)
# stacontig(contigpool)
 print 'tips erroneousconnection', readlength
 contiglengthconstrain= readlength
 contigpool= removetips(contigpool, contiglengthconstrain,idealcontig)
 contigpool= removeerroneousconnections(contigpool, contiglengthconstrain,idealnumber)
 stacontig(contigpool)
 contiglengthconstrain=100
 print 'tips erroneousconnection', contiglengthconstrain
 contigpool= removetips(contigpool, contiglengthconstrain,idealcontig)
 contigpool= removeerroneousconnections(contigpool, contiglengthconstrain,idealcontig)
 stacontig(contigpool)
 idealcontig=maxweight
 print 'tips erroneousconnection', contiglengthconstrain
 contigpool= removetips(contigpool, contiglengthconstrain,idealcontig)
 contigpool= removeerroneousconnections(contigpool, contiglengthconstrain,idealcontig)
 stacontig(contigpool)
 print 'sixth contig set: materead'
 contigpool = materead(contigpool, readpool, readlength)
 stacontig(contigpool)
 meanlength=[len(contigtemp.sequence) for contigtemp in contigpool ]
 meanlength = sum(meanlength)/len(meanlength)
 idealcontig=largeweight * idealfraction1
 contiglengthconstrain=meanlength * idealfraction2
 print 'contiglength constrain contigweight constrain', contiglengthconstrain, idealnumber
 contigpool=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) >= contiglengthconstrain or len(contigtemp.pre) or len(contigtemp.next) or contigtemp.weight >= idealnumber ]
 stacontig(contigpool)
 contigpool= removetips(contigpool, contiglengthconstrain,idealnumber)
 contigpool= removeerroneousconnections(contigpool, contiglengthconstrain,idealnumber)
 stacontig(contigpool)
 print 'contiglength constrain contigweight constrain', contiglengthconstrain, idealcontig
 contigpool=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) >= contiglengthconstrain or len(contigtemp.pre) or len(contigtemp.next) or contigtemp.weight >= idealcontig ]
 stacontig(contigpool)
 contigpool= removetips(contigpool, contiglengthconstrain,idealcontig)
 contigpool= removeerroneousconnections(contigpool, contiglengthconstrain,idealcontig)
 stacontig(contigpool)
 print 'sixth contig set: materead'
 contigpool = materead(contigpool, readpool, readlength)
 stacontig(contigpool)
 contigpool = arrangecontig( contigpool)
 buffer=listcontig(contigpool)
 writecfafile(outname+'_pre_contig.fa', contigpool)
 print('final contig: 100')
 contiglengthconstrain=100
 contigpool= removecontigs(contigpool, contiglengthconstrain,idealnumber)
 stacontig(contigpool)
 contigpool= removecontigs(contigpool, contiglengthconstrain,idealcontig)
 stacontig(contigpool)
# contigpool= removecontigs(contigpool, contiglengthconstrain,idealnumber)
# stacontig(contigpool)
# contigpool= removecontigs(contigpool, contiglengthconstrain,idealcontig)
# stacontig(contigpool)
#print result
 print 'start show contig:',time.time()
 print('contig:')
 print buffer,
 print('finalcontig:')
 showcontigstructure(contigpool)
 writecfafile(outname+'_contig.fa', contigpool)
 print('end show contig:',time.time())
 return contigpool

def assemblecontig1(contigpool,nodepool):
 if homogeneousparameter == -1:
  globals()['homogeneousparameter']=1.0
 print 'homogeneous parameter:', homogeneousparameter
 print 'rlparameter',rlparameter
 print 'rwparameter',rwparameter
 print 'initial contig set:'
 stacontig(contigpool)
 print 'first contig set:contig length constrain =100'
 contiglengthconstrain=100
 contigpool=[contigtemp for contigtemp in contigpool if (len(contigtemp.sequence) >= contiglengthconstrain) or len(contigtemp.pre) or len(contigtemp.next)]
 (l,w)=stacontig(contigpool)
 switch()
 print 'Trim low-frequency edges.' 
 stapath(contigpool)
 switch()
 if reference:
  mapref(contigpool,reference)
 [contigpool,nodepool] =fourceremovebubbles(contigpool,nodepool)
 stacontig(contigpool)
 print 'second contig set: contig weight constrain'
 contiglengthconstrain=k_mer-1
 contigpool= removecontigs(contigpool, contiglengthconstrain, min([w*0.5,idealnumber*0.6])*homogeneousparameter)
 stacontig(contigpool)
 if reference:
  mapref(contigpool,reference)
# [contigpool,nodepool] =fourceremovebubbles(contigpool,nodepool)
# stacontig(contigpool)
# print('second contig:tips')
# contiglengthconstrain=readlength-(readlength+1-k_mer)//2
# contiglengthconstrain=(2*k_mer)-1
 if tips:
  print 'third contig set: remove tips, bubbles, and erroneous connections'
  [contigpool,nodepool]= fourceremove(contigpool, nodepool, rlparameter, rwparameter)
 # contigpool= fourceremovetips(contigpool, 0.4)
  contiglengthconstrain=100
  contigpool=[contigtemp for contigtemp in contigpool if (len(contigtemp.sequence) >= contiglengthconstrain) or len(contigtemp.pre) or len(contigtemp.next)]
  stacontig(contigpool)
 if reference:
  mapref(contigpool,reference)
# print('third contig:')
# contigpool=removebubbles(contigpool, bubbleidentity)
# stacontig(contigpool)
# print('fourth contig-erroneous connections:')
# contiglengthconstrain=readlength-(readlength+1-k_mer)//2
# contigpool= fourceremoveerroneousconnections(contigpool, 0.4, 1.0)
# contiglengthconstrain=100
# contigpool=[contigtemp for contigtemp in contigpool if (len(contigtemp.sequence) >= contiglengthconstrain) or len(contigtemp.pre) or len(contigtemp.next)]
# stacontig(contigpool)
 switch()
 print 'Prune tips, bubbles, and erroneous connections.'
 stapath(contigpool)
 switch()
 print 'fourth contig set: link multidistance',k_mer-m_mer,'at least',m_mer
 contigpool = multidistancecontig(contigpool, k_mer-m_mer)
# stacontig(contigpool)
# if repeatparameter > 0:
#  print 'remove repeat:'
#  contigpool=removerepeat(contigpool)
 [contigpool,nodepool] =fourceremovebubbles(contigpool,nodepool)
 (l,w)=stacontig(contigpool)
 if reference:
  mapref(contigpool,reference)
 print 'fifth contig set: imerconstrain'
 contiglengthconstrain=k_mer+i_mer-1
 contigpool= removecontigs(contigpool, contiglengthconstrain, min([w*0.6,idealnumber*0.8])*homogeneousparameter)
 stacontig(contigpool)
 if reference:
  mapref(contigpool,reference)
 return contigpool

def postprocessingcontig(contigpool):
 contiglengthconstrain=minlen
 contigpool= oldremovetips(contigpool, contiglengthconstrain)
 stacontig(contigpool)
 contigpool= oldremoveerroneousconnections(contigpool, contiglengthconstrain,idealnumber*0.6)
 stacontig(contigpool)
# contigpool = arrangecontig( contigpool)
# buffer=listcontig(contigpool)
# writecfafile(outname+'_pre_contig.fa', contigpool)
 print 'final contig set:'
 contigpool=[contigtemp for contigtemp in contigpool if (len(contigtemp.sequence) >= contiglengthconstrain)]
# contigpool=[contigtemp for contigtemp in contigpool if (len(contigtemp.sequence) >= contiglengthconstrain) or len(contigtemp.pre) or len(contigtemp.next)]
 contigpool= prunecontig(contigpool)
 stacontig(contigpool)
 contigpool = arrangecontig( contigpool)
#print result
# print 'start show contig:',time.time()
# print 'contig set:'
# print buffer,
 print 'finalcontig set:'
 showcontigstructure(contigpool)
 switch()
 print 'Finish clean graph.'
 print 'Contig set:'
 stacontig(contigpool)
 switch()
 writecfafile(outname+'_contig.fasta', contigpool)
# print 'end show contig:',time.time()
 return contigpool

def assemblecontigtrans(contigpool):
 if homogeneousparameter == -1:
  globals()['homogeneousparameter']=0.4
 print 'homogeneous parameter:', homogeneousparameter
 print 'rlparameter',rlparameter
 print 'rwparameter',rwparameter
 print 'initial contig set:'
 stacontig(contigpool)
 print 'first contig set:contig length constrain =100'
 contiglengthconstrain=100
 contigpool=[contigtemp for contigtemp in contigpool if (len(contigtemp.sequence) >= contiglengthconstrain) or len(contigtemp.pre) or len(contigtemp.next)]
 (l,w)=stacontig(contigpool)
 print 'second contig set: contig weight constrain'
 contiglengthconstrain=k_mer-1
 contigpool= removecontigs(contigpool, contiglengthconstrain, min([w*0.5,idealnumber*0.6])*homogeneousparameter)
 stacontig(contigpool)
# print('second contig:tips')
# contiglengthconstrain=readlength-(readlength+1-k_mer)//2
# contiglengthconstrain=(2*k_mer)-1
 print 'third contig set: remove tips and erroneous connections'
 contigpool= fourceremove(contigpool, rlparameter, rwparameter)
# contigpool= fourceremovetips(contigpool, 0.4)
 contiglengthconstrain=100
 contigpool=[contigtemp for contigtemp in contigpool if (len(contigtemp.sequence) >= contiglengthconstrain) or len(contigtemp.pre) or len(contigtemp.next)]
 stacontig(contigpool)
# print('third contig:')
# contigpool=removebubbles(contigpool, bubbleidentity)
# stacontig(contigpool)
# print('fourth contig-erroneous connections:')
# contiglengthconstrain=readlength-(readlength+1-k_mer)//2
# contigpool= fourceremoveerroneousconnections(contigpool, 0.4, 1.0)
# contiglengthconstrain=100
# contigpool=[contigtemp for contigtemp in contigpool if (len(contigtemp.sequence) >= contiglengthconstrain) or len(contigtemp.pre) or len(contigtemp.next)]
# stacontig(contigpool)
 print 'fourth contig set: link multidistance',k_mer-m_mer,'at least',m_mer
 contigpool = multidistancecontig(contigpool, k_mer-m_mer)
 (l,w)=stacontig(contigpool)
 print 'fifth contig set: imerconstrain'
 contiglengthconstrain=k_mer+i_mer-1
 contigpool= removecontigs(contigpool, contiglengthconstrain, min([w*0.6,idealnumber*0.8])*homogeneousparameter)
 stacontig(contigpool)
 return contigpool

def extendcontig(contigpool):
 contigpool=refreshcontig(contigpool)
# print 'link contig dg:',time.time()
 contigpool= linkcontig(contigpool,1)
# print 'merge contig:',time.time()
 contigpool= mergecontig(contigpool,1)
 return contigpool

def extendcontigtrans(contigpool):
 contigpool=refreshcontig(contigpool)
 print 'link contig dg:',time.time()
 contigpool= linkcontigtrans(contigpool,1)
 print 'merge contig:',time.time()
 contigpool= mergecontig(contigpool,1)
 return contigpool

def multidistancecontig(contigpool,distance):
 pool=[]
 for d in range(2,distance+1):
  temppool = [contigtemp for contigtemp in contigpool if len(contigtemp.pre) and len(contigtemp.next) ]
  temppoolset = set(temppool)
  contigpool = [contigtemp for contigtemp in contigpool if contigtemp not in temppoolset]
  pool.extend(temppool)
  contigpool=refreshcontig(contigpool)
  contigpool= linkcontig(contigpool,d)
  contigpool= reducelink(contigpool)
  contigpool= mergecontig(contigpool,d)
 contigpool.extend(pool)
 contigpool= cleancontig(contigpool)
 contigpool=refreshcontig(contigpool)
 contigpool= linkcontig(contigpool,1)
 contigpool= mergecontig(contigpool,1)
 return contigpool

def multidistancecontigtrans(contigpool,distance):
 pool=[]
 for d in range(2,distance+1):
  temppool = [contigtemp for contigtemp in contigpool if len(contigtemp.pre) and len(contigtemp.next) ]
  temppoolset = set(temppool)
  contigpool = [contigtemp for contigtemp in contigpool if contigtemp not in temppoolset]
  pool.extend(temppool)
  contigpool=refreshcontig(contigpool)
  contigpool= linkcontigtrans(contigpool,d)
  contigpool= reducelink(contigpool)
  contigpool= mergecontig(contigpool,d)
 contigpool.extend(pool)
 contigpool= cleancontig(contigpool)
 contigpool=refreshcontig(contigpool)
 contigpool= linkcontigtrans(contigpool,1)
 contigpool= mergecontig(contigpool,1)
 return contigpool

def linkcycliccontig( contigpool, distance ):
 kmd=k_mer-distance
 dg={}
 for contigtemp in contigpool:
  for seedsequence in (contigtemp.sequence[:kmd]+'s', contigtemp.reverse[:kmd]+'r'):
   if seedsequence in dg:
    dg[seedsequence]+=1
   else:
    dg[seedsequence] =1
 (dg,dga)=sdg(dg)
 for contigtemp in contigpool:
  for seedsequence in (contigtemp.sequence[:kmd]+'s', contigtemp.reverse[:kmd]+'r'):
   (dgb,dgl)=dg[seedsequence]
   dga[dgl]=contigtemp
   dg[seedsequence]=(dgb,dgl+1)
 for contigtemp in contigpool:
  sequence=contigtemp.sequence[-kmd:]
  seedsequence=sequence+'s'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
#   if contigtemp in temp:
#    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.next.extend(temp)
   contigtemp.nextdirection.extend([True]*len(temp))
  seedsequence=sequence+'r'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
#   if contigtemp in temp:
#    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.next.extend(temp)
   contigtemp.nextdirection.extend([False]*len(temp))
  sequence=contigtemp.reverse[-kmd:]
  seedsequence=sequence+'r'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
#   if contigtemp in temp:
#    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.pre.extend(temp)
   contigtemp.predirection.extend([True]*len(temp))
  seedsequence=sequence+'s'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
#   if contigtemp in temp:
#    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.pre.extend(temp)
   contigtemp.predirection.extend([False]*len(temp))
 return contigpool

def linkcycliccontigtrans( contigpool, distance ):
 kmd=k_mer-distance
 dg={}
 for contigtemp in contigpool:
  for seedsequence in (contigtemp.sequence[:kmd]+'s', contigtemp.reverse[:kmd]+'r'):
   if seedsequence in dg:
    dg[seedsequence]+=1
   else:
    dg[seedsequence] =1
 (dg,dga)=sdg(dg)
 for contigtemp in contigpool:
  for seedsequence in (contigtemp.sequence[:kmd]+'s', contigtemp.reverse[:kmd]+'r'):
   (dgb,dgl)=dg[seedsequence]
   dga[dgl]=contigtemp
   dg[seedsequence]=(dgb,dgl+1)
 for contigtemp in contigpool:
  sequence=contigtemp.sequence[-kmd:]
  seedsequence=sequence+'s'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
#   if contigtemp in temp:
#    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.next.extend(temp)
   contigtemp.nextdirection.extend([True]*len(temp))
  sequence=contigtemp.reverse[-kmd:]
  seedsequence=sequence+'r'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
#   if contigtemp in temp:
#    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.pre.extend(temp)
   contigtemp.predirection.extend([True]*len(temp))
 return contigpool

def linkscaffold( contigpool):
 kmd=k_mer-1
 dg={}
 for contigtemp in contigpool:
  for seedsequence in (contigtemp.sequence[:kmd]+'s', contigtemp.reverse[:kmd]+'r'):
   if seedsequence in dg:
    dg[seedsequence]+=1
   else:
    dg[seedsequence] =1
 (dg,dga)=sdg(dg)
 for contigtemp in contigpool:
  for seedsequence in (contigtemp.sequence[:kmd]+'s', contigtemp.reverse[:kmd]+'r'):
   (dgb,dgl)=dg[seedsequence]
   dga[dgl]=contigtemp
   dg[seedsequence]=(dgb,dgl+1)
 for contigtemp in contigpool:
  sequence=contigtemp.sequence[-kmd:]
  seedsequence=sequence+'s'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
#   if contigtemp in temp:
#    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.next.extend(temp)
   contigtemp.nextdirection.extend([True]*len(temp))
   contigtemp.nextdistance.extend([0]*len(temp))
  seedsequence=sequence+'r'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
   if contigtemp in temp:
    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.next.extend(temp)
   contigtemp.nextdirection.extend([False]*len(temp))
   contigtemp.nextdistance.extend([0]*len(temp))
  sequence=contigtemp.reverse[-kmd:]
  seedsequence=sequence+'r'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
#   if contigtemp in temp:
#    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.pre.extend(temp)
   contigtemp.predirection.extend([True]*len(temp))
   contigtemp.predistance.extend([0]*len(temp))
  seedsequence=sequence+'s'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
   if contigtemp in temp:
    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.pre.extend(temp)
   contigtemp.predirection.extend([False]*len(temp))
   contigtemp.predistance.extend([0]*len(temp))
 return contigpool

def linkcontig( contigpool, distance ):
 kmd=k_mer-distance
 dg={}
 for contigtemp in contigpool:
  for seedsequence in (contigtemp.sequence[:kmd]+'s', contigtemp.reverse[:kmd]+'r'):
   if seedsequence in dg:
    dg[seedsequence]+=1
   else:
    dg[seedsequence] =1
 (dg,dga)=sdg(dg)
 for contigtemp in contigpool:
  for seedsequence in (contigtemp.sequence[:kmd]+'s', contigtemp.reverse[:kmd]+'r'):
   (dgb,dgl)=dg[seedsequence]
   dga[dgl]=contigtemp
   dg[seedsequence]=(dgb,dgl+1)
 for contigtemp in contigpool:
  sequence=contigtemp.sequence[-kmd:]
  seedsequence=sequence+'s'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
#   if contigtemp in temp:
#    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.next.extend(temp)
   contigtemp.nextdirection.extend([True]*len(temp))
  seedsequence=sequence+'r'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
   if contigtemp in temp:
    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.next.extend(temp)
   contigtemp.nextdirection.extend([False]*len(temp))
  sequence=contigtemp.reverse[-kmd:]
  seedsequence=sequence+'r'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
#   if contigtemp in temp:
#    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.pre.extend(temp)
   contigtemp.predirection.extend([True]*len(temp))
  seedsequence=sequence+'s'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
   if contigtemp in temp:
    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.pre.extend(temp)
   contigtemp.predirection.extend([False]*len(temp))
 return contigpool

def linkcontigtrans( contigpool, distance ):
 kmd=k_mer-distance
 dg={}
 for contigtemp in contigpool:
  for seedsequence in (contigtemp.sequence[:kmd]+'s', contigtemp.reverse[:kmd]+'r'):
   if seedsequence in dg:
    dg[seedsequence]+=1
   else:
    dg[seedsequence] =1
 (dg,dga)=sdg(dg)
 for contigtemp in contigpool:
  for seedsequence in (contigtemp.sequence[:kmd]+'s', contigtemp.reverse[:kmd]+'r'):
   (dgb,dgl)=dg[seedsequence]
   dga[dgl]=contigtemp
   dg[seedsequence]=(dgb,dgl+1)
 for contigtemp in contigpool:
  sequence=contigtemp.sequence[-kmd:]
  seedsequence=sequence+'s'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
#   if contigtemp in temp:
#    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.next.extend(temp)
   contigtemp.nextdirection.extend([True]*len(temp))
  sequence=contigtemp.reverse[-kmd:]
  seedsequence=sequence+'r'
  if seedsequence in dg:
   (dgb,dgl)=dg[seedsequence]
   temp=dga[dgb:dgl]
#   if contigtemp in temp:
#    temp = [temptemp for temptemp in temp if temptemp is not contigtemp ]
   contigtemp.pre.extend(temp)
   contigtemp.predirection.extend([True]*len(temp))
 return contigpool

def removecontigs(contigpool, contiglengthconstrain,contigweightconstrain):
 i=-1
 ncl=len(contigpool)
 cl=ncl+1
 while ncl<cl:
  i+=1
  cl=ncl
  contigpool=[contigtemp for contigtemp in contigpool if (len(contigtemp.sequence) >= contiglengthconstrain) and (contigtemp.weight >= contigweightconstrain)]
  contigpool= prunecontig(contigpool)
  ncl=len(contigpool)
 print 'round:',i
 return contigpool

def oldremoveerroneousconnections(contigpool, contiglengthconstrain,contigweightconstrain):
 i=-1
 ncl=len(contigpool)
 cl=ncl+1
 while ncl<cl:
  i+=1
  cl=ncl
#  contigpool=[contigtemp for contigtemp in contigpool if (len(contigtemp.sequence) >= contiglengthconstrain) and(contigtemp.weight >= contigweightconstrain)]
  contigpool=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) >= contiglengthconstrain or contigtemp.weight >= contigweightconstrain]
  contigpool= prunecontig(contigpool)
  ncl=len(contigpool)
 print 'round:',i
 return contigpool

def oldremovetips(contigpool, contiglengthconstrain):
 i=-1
 ncl=len(contigpool)
 cl=ncl+1
 while ncl<cl:
  i+=1
  cl=ncl
  contigpool=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) >= contiglengthconstrain or (len(contigtemp.pre) and len(contigtemp.next))]
  contigpool= prunecontig(contigpool)
  ncl=len(contigpool)
 print 'round',i
 return contigpool

def removeerroneousconnections(contigpool, contiglengthconstrain,contigweightconstrain):
 i=-1
 ncl=len(contigpool)
 cl=ncl+1
 while ncl<cl:
  i+=1
  cl=ncl
  contigpool=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) >= contiglengthconstrain or (len(contigtemp.pre) > 1) or (len(contigtemp.next) > 1) or contigtemp.weight >= contigweightconstrain]
  contigpool= prunecontig(contigpool)
  ncl=len(contigpool)
 print 'round:',i
 return contigpool

def fourceremoveerroneousconnections_o(contigpool, fraction1, fraction2):
 i=-1
 kmm=k_mer -1
 ncl=len(contigpool)
 cl=ncl+1
 while ncl<cl:
  i+=1
  cl=ncl
  contiglen=[len(contigtemp.sequence) for contigtemp in contigpool]
  totalnode=sum([temp-kmm for temp in contiglen ])
  totalweight=sum([contigtemp.sum for contigtemp in contigpool])
  if totalnode ==0:
   contiglengthconstrain=0
   contigweightconstrain=0
  else:
   contiglengthconstrain= (sum(contiglen)*fraction1)/cl
   contigweightconstrain=(totalweight*fraction2)/totalnode
  contigpool=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) >= contiglengthconstrain or (len(contigtemp.pre) > 1) or (len(contigtemp.next) > 1) or contigtemp.weight >= contigweightconstrain]
  contigpool= prunecontig(contigpool)
  ncl=len(contigpool)
 print 'round:',i
 return contigpool

def fourceremoveerroneousconnections(contigpool, fraction1, fraction2):
 def cmps( a,b):
  if a.sum < b.sum:
   return 1
  if a.sum > b.sum:
   return -1
  if a.weight < b.weight:
   return 1
  return -1
 i=-1
 kmm=k_mer -1
 ncl=len(contigpool)
 print 'remove erroneous connections: number contig',ncl
 cl=ncl+1
 while ncl<cl:
  i+=1
  cl=ncl
  contiglen=[len(contigtemp.sequence) for contigtemp in contigpool]
  totalnode=sum([temp-kmm for temp in contiglen ])
  totalweight=sum([contigtemp.sum for contigtemp in contigpool])
  if totalnode ==0:
   contiglengthconstrain=0
   contigweightconstrain=0
  elif fraction1==0 and fraction2 ==0:
   contiglengthconstrain=2*k_mer
   contigweightconstrain=c_number
  else:
   contiglengthconstrain= (sum(contiglen)*fraction1)/cl
   contigweightconstrain=(totalweight*fraction2)/totalnode
  for contigtemp in contigpool:
   if len(contigtemp.sequence) >= contiglengthconstrain or (len(contigtemp.pre) > 1) or (len(contigtemp.next) > 1) or contigtemp.weight >= contigweightconstrain:
    contigtemp.fresh=1
   else:
    contigtemp.fresh=0
  for contigtemp in contigpool:
   temppool=[temp for temp in contigtemp.pre if temp.fresh!=1]
   if len(temppool)>= 2:
    temppool.sort(cmps)
    temppool[0].fresh=2
   temppool=[temp for temp in contigtemp.next if temp.fresh!=1]
   if len(temppool)>= 2:
    temppool.sort(cmps)
    temppool[0].fresh=2
  contigpool=[contigtemp for contigtemp in contigpool if contigtemp.fresh!=0 ]
  contigpool= refreshcontig(contigpool)
  contigpool= prunecontig(contigpool)
  ncl=len(contigpool)
 print 'number contig',ncl
 print 'round',i
 return contigpool

def fourceremove(contigpool, nodepool,fraction1, fraction2):
 i=-1
 ncl=len(contigpool)
 cl=ncl+1
 while ncl<cl:
  i+=1
  cl=ncl
  contigpool=fourceremovetips(contigpool, fraction1)
#  [contigpool,nodepool] =fourceremovebubbles(contigpool,nodepool,fraction1)
  contigpool=fourceremoveerroneousconnections(contigpool, fraction1, fraction2)
  ncl=len(contigpool)
 print 'round:',i
 return [contigpool,nodepool]

def fourceprune(contigpool, fraction1, fraction2):
 i=-1
 kmm=k_mer -1
 ncl=len(contigpool)
 cl=ncl+1
 while ncl<cl:
  i+=1
  cl=ncl
  totallength=sum([len(contigtemp.sequence) for contigtemp in contigpool ])
  if cl:
   contiglengthconstrain= (totallength*fraction1)/cl
  else:
   contiglengthconstrain=0
  contigpool=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) >= contiglengthconstrain or (len(contigtemp.pre) + len(contigtemp.next) > 1)]
  contigpool= prunecontig(contigpool)
  ncl=len(contigpool)
  contiglen=[len(contigtemp.sequence) for contigtemp in contigpool]
  totalnode=sum([temp-kmm for temp in contiglen ])
  totalweight=sum([contigtemp.sum for contigtemp in contigpool])
  if totalnode ==0:
   contiglengthconstrain=0
   contigweightconstrain=0
  else:
   contiglengthconstrain= (sum(contiglen)*fraction1)/ncl
   contigweightconstrain=(totalweight*fraction2)/totalnode
  contigpool=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) >= contiglengthconstrain or (len(contigtemp.pre) > 1) or (len(contigtemp.next) > 1) or contigtemp.weight >= contigweightconstrain]
  contigpool= prunecontig(contigpool)
  ncl=len(contigpool)
 print 'round:',i
 return contigpool

def removetips(contigpool, contiglengthconstrain):
 i=-1
 ncl=len(contigpool)
 cl=ncl+1
 while ncl<cl:
  i+=1
  cl=ncl
  contigpool=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) >= contiglengthconstrain or (len(contigtemp.pre) + len(contigtemp.next) > 1)]
  contigpool= prunecontig(contigpool)
  ncl=len(contigpool)
 print 'round',i
 return contigpool

def fourceremovetips_o(contigpool, fraction):
 i=-1
 ncl=len(contigpool)
 cl=ncl+1
 while ncl<cl:
  i+=1
  cl=ncl
  totallength=sum([len(contigtemp.sequence) for contigtemp in contigpool ])
  if cl:
   contiglengthconstrain= (totallength*fraction)/cl
  else:
   contiglengthconstrain=0
  contigpool=[contigtemp for contigtemp in contigpool if len(contigtemp.sequence) >= contiglengthconstrain or (len(contigtemp.pre) + len(contigtemp.next) > 1)]
  contigpool= prunecontig(contigpool)
  ncl=len(contigpool)
 print 'round',i
 return contigpool

def fourceremovetips(contigpool, fraction):
 def cmps( a,b):
  if len(a.sequence) < len(b.sequence):
   return 1
  if len(a.sequence) > len(b.sequence):
   return -1
  if a.weight < b.weight:
   return 1
  return -1
 i=-1
 ncl=len(contigpool)
 print 'remove tips: number contig',ncl
 cl=ncl+1
 while ncl<cl:
  i+=1
  cl=ncl
  totallength=sum([len(contigtemp.sequence) for contigtemp in contigpool ])
  if fraction==0:
   contiglengthconstrain= 2*k_mer
  elif cl:
   contiglengthconstrain= (totallength*fraction)/cl
  else:
   contiglengthconstrain=0
  for contigtemp in contigpool:
   if len(contigtemp.sequence) >= contiglengthconstrain or (len(contigtemp.pre) + len(contigtemp.next) > 1):
    contigtemp.fresh=True
   else:
    contigtemp.fresh=False
  for contigtemp in contigpool:
   temppool=[temp for temp in contigtemp.pre if not temp.fresh]
   if len(temppool)>= 2:
    temppool.sort(cmps)
    temppool[0].fresh=True
   temppool=[temp for temp in contigtemp.next if not temp.fresh]
   if len(temppool)>= 2:
    temppool.sort(cmps)
    temppool[0].fresh=True
  contigpool=[contigtemp for contigtemp in contigpool if contigtemp.fresh ]
  contigpool= prunecontig(contigpool)
  ncl=len(contigpool)
 print 'number contig',ncl
 print 'round',i
 return contigpool

def fourceremovebubbles(contigpool, nodepool,fraction=None):
 i=-1
 ncl=len(contigpool)
 cl=ncl+1
 while ncl<cl:
  i+=1
  cl=ncl
  totallength=sum([len(contigtemp.sequence) for contigtemp in contigpool ])
  if fraction:
   if cl:
    contiglengthconstrain= (totallength*fraction)/cl
   else:
    contiglengthconstrain=0
  else:
   contiglengthconstrain=fraction
  [contigpool,nodepool] =removebubbles(contigpool,nodepool,contiglengthconstrain)
  ncl=len(contigpool)
 print 'round',i
 return [contigpool,nodepool]

def resetreadnode(nodepool):
 for i in range(len(nodepool)):
  if nodepool[i][0]==0:
   nodepool[i]=nodepool[i][1].nodelist[nodepool[i][2]]
 return nodepool

def removebubbles(contigpool,nodepool,constrain=None):
 def cmps( a,b):
  if len(a.sequence) < len(b.sequence):
   return -1
  if len(a.sequence) > len(b.sequence):
   return 1
  if a.sum <= b.sum:
   return -1
  return 1
 contigpool=refreshcontig(contigpool)
 if constrain:
  contigpool1=[ contigtemp for contigtemp in contigpool if len(contigtemp.sequence)>=constrain]
  contigset1 = set(contigpool1)
  contigpool = [contigtemp for contigtemp in contigpool if contigtemp not in contigset1]
 contigpool = ssetnodeinfo( contigpool)
# print 'start mergebubble:',time.time()
 contigpool.sort(cmps)
# contigpool = setnodeinfo( contigpool)
# temppool=[]
 pool=[]
 lengthtemp=0
 for contigtemp in contigpool:
  lc= len(contigtemp.sequence)
  if lc ==lengthtemp:
   pool.append(contigtemp)
  else:
#   pool= [ contigtemp for contigtemp in pool if len(contigtemp.pre) or len(contigtemp.next) ]
   if len(pool) >1:
#    temppool.append(pool[0])
#    pool[0]==contigtemp
#    lengthtemp=lc
#   else:
    fastmergebubble(pool, int((1.0-bubbleidentity)*lengthtemp))
   pool=[contigtemp]
   lengthtemp=lc
# print 'finish mergebubble:',time.time()
 nodepool =resetreadnode(nodepool)
 contigpool=[contigtemp for contigtemp in contigpool if contigtemp.fresh]
 if constrain:
  contigpool+=contigpool1
 contigpool=prunecontig(contigpool)
# contigpool= prunecontig(contigpool)
# contigpool=extendcontig(temppool)
 return [contigpool,nodepool]

def mergebubble0(pool,constrain):
 return pool

def checkset(p, pd, d = True ):
 lp=len(p)
 if lp ==0:
  return set(p)
 if lp ==1:
  if d:
   return set([(p[0],pd[0]) ])
  else:
   return set([(p[0], not pd[0]) ])
 if d:
  return set([(p[i],pd[i]) for i in range(lp) ] )
 else:
  return set([(p[i], not pd[i]) for i in range(lp) ] )

def contigindistance(ca,cb,constrain):
 if withindistance(ca.sequence,cb.sequence,constrain):
  if (checkset(ca.pre,ca.predirection,True)==checkset(cb.pre,cb.predirection,True)) and (checkset(ca.next, ca.nextdirection, True)==checkset(cb.next, cb.nextdirection,True)):
   return True
 if withindistance(ca.sequence,cb.reverse,constrain):
  if (checkset(ca.pre, ca.predirection, True)==checkset(cb.next, cb.nextdirection, False)) and (checkset(ca.next, ca.nextdirection,True)==checkset(cb.pre, cb.predirection, False)):
   return False
 return None

def reducelink(contigpool):
 for ct in contigpool:
  ctl=len(ct.pre)
  if ctl >1:
   if len(set(ct.pre)) < ctl:
    ctp=ct.pre
    ctpd= ct.predirection
    for i in range(ctl-1, -1, -1):
     if findpair( ctp, ctpd, ctp[i], ctpd[i] ) != i:
      ctp.pop(i)
      ctpd.pop(i)
  ctl=len(ct.next)
  if ctl >1:
   if len(set(ct.next)) < ctl:
    ctn=ct.next
    ctnd= ct.nextdirection
    for i in range(ctl-1, -1, -1):
     if findpair( ctn, ctnd, ctn[i], ctnd[i] ) != i:
      ctn.pop(i)
      ctnd.pop(i)
 return contigpool

def findpair( fl, sl, fo, so):
 for j in range(len(fl)):
  if fl[j] == fo:
   if sl[j] == so:
    return j
 return -1
 print fl
 for temp in fl:
  print temp.pre
  print temp.predirection
  print temp.next
  print temp.nextdirection
  print len(temp.sequence)
  print len(temp.reverse)
  print temp.sequence
  print temp.reverse
 print sl
 print fo, so
 print len(fo.sequence)
 print len(fo.reverse)
 print fo.pre
 print fo.predirection
 print fo.next
 print fo.nextdirection
 print fo.sequence
 print fo.reverse
 return 1000000

def idealconstrain( contigpool):
 s=0
 l=0
 for c in contigpool:
  s+= c.sum
  l+= len(c.nodelist)
 constrain= int(math.ceil(float(s)/l))
 return constrain

def mergecontigp(ca,cb,direction):
 can=ca.nodelist
 cbn=cb.nodelist
 nl=len(can)
 if direction:
  for i in range(nl):
     cbn[i].contiginfo=can[i].contiginfo
 else:
  cb.sequence=cb.reverse
  nlm=nl-1
  for i in range(nl):
     cbn[i].contiginfo=can[nlm-i].contiginfo
 if ca.weight < cb.weight:
  ca.sequence=cb.sequence
 ca.weight=ca.weight+cb.weight
 return ca

def turnnodeinfo(ca,cb,direction):
 can=ca.nodelist
 cbn=cb.nodelist
 nl=len(can)
 if direction:
  for i in range(nl):
     cbn[i].contiginfo=can[i].contiginfo
 else:
  nlm=nl-1
  for i in range(nl):
     cbn[i].contiginfo=can[nlm-i].contiginfo
 return ca

def fastmergebubble_o(pool,constrain):
# lp=len(pool)
# if lp< 2:
#  return pool
 t1= [ contigtemp for contigtemp in pool if len(contigtemp.pre) and len(contigtemp.next) ]
 t1set = set(t1)
 pool = [contigtemp for contigtemp in pool if contigtemp not in t1set]
# pool = list(pool)
 t2= [ contigtemp for contigtemp in pool if len(contigtemp.pre) or len(contigtemp.next) ]
 t2set = set(t2)
 pool = [contigtemp for contigtemp in pool if contigtemp not in t2set]
# pool = list(pool)
 if len(t1) >1:
  mergebubble(t1,constrain)
 if len(t2) >1:
  mergebubble(t2,constrain)
 if len(pool) >1:
  mergebubble(pool,constrain)
 return

def fastmergebubble(pool,constrain):
 if len(pool) < 2:
  return
 pooldict = {}
 for contigtemp in pool:
  (lcp,lcn) = (len(contigtemp.pre),len(contigtemp.next))
  if lcp and lcn:
   if lcp < 2:
    pretemp = (contigtemp.pre[0],)
   elif lcp == 2:
    if contigtemp.pre[0] < contigtemp.pre[1]:
     pretemp = (contigtemp.pre[0], contigtemp.pre[1])
    else:
     pretemp = (contigtemp.pre[1], contigtemp.pre[0])
   else:
    pretemp = contigtemp.pre[:]
    pretemp.sort()
    pretemp = tuple(pretemp)
   if lcn < 2:
    nexttemp = (contigtemp.next[0],)
   elif lcn == 2:
    if contigtemp.next[0] < contigtemp.next[1]:
     nexttemp = (contigtemp.next[0], contigtemp.next[1])
    else:
     nexttemp = (contigtemp.next[1], contigtemp.next[0])
   else:
    nexttemp = contigtemp.next[:]
    nexttemp.sort()
    nexttemp = tuple(nexttemp)
   if pretemp < nexttemp:
    key = (pretemp,nexttemp)
   else:
    key = (nexttemp,pretemp)
   if key in pooldict:
    pooldict[key].append(contigtemp)
   else:
    pooldict[key] = [contigtemp]
 for key in pooldict:
  if len(pooldict[key]) > 1:
   mergebubble(pooldict[key],constrain)
 return

def mergebubble(pool,constrain):
 lp=len(pool)
# if lp< 2:
#  return pool
# if lp==2:
#  r= contigindistance(pool[0],pool[1],constrain)
#  if r is not None:
#   return [mergecontigp(pool[0],pool[1],r)]
#  return pool
# temppool=[]
# for  i in range(lp):
#  temppool.append([])
#  pool[i].fresh=True
 for i in range(lp-1):
  for j in range(i+1,lp):
   r=contigindistance(pool[i],pool[j],constrain)
   if r is not None:
    pool[j].sum+=pool[i].sum
    pool[j].weight+=pool[i].weight
    pin=pool[i].nodelist
    pjn=pool[j].nodelist
    if r:
     for k in range(len(pin)):
      pjn[k][0]+=pin[k][0]
      pin[k][0]= 0
    else:
     l=0
     for k in range(len(pin)-1,-1,-1):
      pjn[l][0]+=pin[k][0]
      pin[k][0]= 0
      l+=1
    pool[i].fresh=False
    break
# for i in range(lp-1,0,-1):
#  for j in range(i-1,-1,-1):
#   r=contigindistance(pool[i],pool[j],constrain)
#   if r is not None:
#    pin=pool[i].nodelist
#    pjn=pool[j].nodelist
#    if r:
#     for k in range(len(pin)):
#      pjn[k][1] =pin[k][1]
#      pjn[k][2] =pin[k][2]
#    else:
#     l=0
#     for k in range(len(pin)-1,-1,-1):
#      pjn[l][1] =pin[k][1]
#      pjn[l][2] =pin[k][2]
#      l+=1
 return
#    temppool[i].append((j,r))
#    temppool[j].append((i,r))
# np=[]
# for i in range(len(pool)):
#  nt=pool[i]
#  if nt.fresh:
#   nt.fresh=False
#   if len(temppool[i])==0:
#    continue
#   nt.reverse=True
#   np.append(nt)
#   nn=[nt]
#   ntp=[i]
#   while len(ntp):
#    j=ntp.pop(0)
#    for tt in temppool[j]:
#     (tt,r)=tt
#     tc=pool[tt]
#     if tc.fresh:
#      tc.fresh=False
#      nn.append(tc)
#      ntp.append(tt)
#      if pool[j].reverse == r:
#       tc.reverse=True
#       nt=turnnodeinfo(nt,tc,True)
#      else:
#       tc.sequence=tc.reverse
#       tc.reverse=False
#       nt=turnnodeinfo(nt,tc,False)
#   nt=mergecontigs(nn)
# return np

#def mergecontigs(nn):
# nt=nn[0]
# ln=len(nn)
# l=len(nt.sequence)
# rd=('C','G','T')
# nt.weight=sum([tt.weight for tt in nn])
# dl={'A':[0]*l,'C':[0]*l,'G':[0]*l,'T':[0]*l}
# for k in nn:
#  s=k.sequence
#  kmerweight=k.weight
#  for j in range(l):
#   dl[s[j]][j] +=kmerweight
# s=['A']*l
# for j in range(l):
#  for d in rd:
#   if dl[d][j] > dl[s[j]][j]:
#    s[j]=d
# nt.sequence= ''.join(s)
# nt.reverse=nt.getreverse()
# return nt

def setnodeinfo( contigpool):
# i=0
 for contigtemp in contigpool:
#  contigtemp.id=i
  j=0
  for nodetemp in contigtemp.nodelist:
#   nodetemp.contiginfo=[i,j]
   nodetemp.contiginfo=(contigtemp,j)
   j+=1
#  i+=1
 return contigpool

def ssetnodeinfo( contigpool):
# i=0
 for contigtemp in contigpool:
#  contigtemp.id=i
  j=0
  for nodetemp in contigtemp.nodelist:
#   nodetemp.contiginfo=[i,j]
   nodetemp[1]=contigtemp
   nodetemp[2]=j
   j+=1
#  i+=1
 return contigpool

def getreverse( sequence):
 return ''.join([ rdna[d] for d in sequence[::-1] ])

def materead_o(contigpool,nodepool,readpool):
 contigpool = setnodeinfo( contigpool)
 nodelength=max([readtemp.kl for readtemp in readpool ])
 temppool= [contigtemp for contigtemp in contigpool if len(contigtemp.pre) and len(contigtemp.next) ]
 tempset = set(temppool)
 contigpool = [contigtemp for contigtemp in contigpool if contigtemp not in tempset]
 contigset= set(contigpool)
 for temp in contigpool:
  temp.fresh=True
  if len(temp.next):
   temp.nextdistance=1
  else:
   temp.nextdistance=nodelength
  if len(temp.pre):
   temp.predistance=1
  else:
   temp.predistance=nodelength
 for i in range(0,len(nodepool),2):
  readtemp=readpool[i/2]
  (c1,cp1)=nodepool[i].contiginfo
  if c1 not in contigset:
   continue
  (c2,cp2)=nodepool[i+1].contiginfo
  if c2 not in contigset or c1 == c2:
   continue
  cl1=len(c1.nodelist) -cp1
  cl2=len(c2.nodelist) -cp2
# next true
#  if cl1< nodelength:
#   if cp2 < nodelength:
#  if cl1+cp2 < nodelength:
  temp=cl1+cp2
  if temp < readtemp.kl:
   temp = readtemp.kl-temp   
   otemp=k_mer-temp
   if (temp < c1.nextdistance) and (temp < c2.predistance) and ((otemp <=0) or c1.sequence[-otemp:] == c2.sequence[:otemp]):
    if c1.nextdistance == nodelength:
     c1.next.append(c2)
     c1.nextdirection.append(True)
    else:
     c1n=c1.next[0]
     if c1.nextdirection[0]:
#      if c1n.pre[0] ==c1:
      c1n.pre.pop()
      c1n.predirection.pop()
      c1n.predistance=nodelength
     else:
      c1n.next.pop()
      c1n.nextdirection.pop()
      c1n.nextdistance=nodelength
     c1.next[0] = c2
     c1.nextdirection[0] = True
    if c2.predistance == nodelength:
     c2.pre.append(c1)
     c2.predirection.append(True)
    else:
     c2p=c2.pre[0]
     if c2.predirection[0]:
      c2p.next.pop()
      c2p.nextdirection.pop()
      c2p.nextdistance=nodelength
     else:
      c2p.pre.pop()
      c2p.predirection.pop()
      c2p.predistance=nodelength
     c2.pre[0]=c1
     c2.predirection[0]=True
    c1.nextdistance=temp
    c2.predistance=temp
    c1.nextsequence=readtemp.sequence[k_mer+cl1-1:temp+cl1-1]
    c2.presequence = c1.nextsequence[:]
# next False
#  if cl1+cl2 -1 < nodelength:
  temp=cl1+cl2 -1
  if temp < readtemp.kl:
   temp = readtemp.kl-temp   
   otemp=k_mer-temp
   if (temp < c1.nextdistance) and (temp < c2.nextdistance) and ((otemp <=0) or c1.sequence[-otemp:] == c2.reverse[:otemp]):
    if c1.nextdistance == nodelength:
     c1.next.append(c2)
     c1.nextdirection.append(False)
    else:
     c1n=c1.next[0]
     if c1.nextdirection[0]:
#      if c1n.pre[0] ==c1:
      c1n.pre.pop()
      c1n.predirection.pop()
      c1n.predistance=nodelength
     else:
      c1n.next.pop()
      c1n.nextdirection.pop()
      c1n.nextdistance=nodelength
     c1.next[0] = c2
     c1.nextdirection[0] = False
    if c2.nextdistance == nodelength:
     c2.next.append(c1)
     c2.nextdirection.append(False)
    else:
     c2n=c2.next[0]
     if c2.nextdirection[0]:
      c2n.pre.pop()
      c2n.predirection.pop()
      c2n.predistance=nodelength
     else:
      c2n.next.pop()
      c2n.nextdirection.pop()
      c2n.nextdistance=nodelength
     c2.next[0]=c1
     c2.nextdirection[0]=False
    c1.nextdistance=temp
    c2.nextdistance=temp
    c1.nextsequence=readtemp.sequence[k_mer+cl1-1:temp+cl1-1]
    c2.nextsequence = getreverse(c1.nextsequence)
# pre True
#  if cp1+cl2 < nodelength:
  temp=cp1+cl2 
  if temp < readtemp.kl:
   temp = readtemp.kl-temp   
   otemp=k_mer-temp
   if (temp < c1.predistance) and (temp < c2.nextdistance) and ((otemp <=0) or c1.sequence[:otemp] == c2.sequence[-otemp:]):
    if c1.predistance == nodelength:
     c1.pre.append(c2)
     c1.predirection.append(True)
    else:
     c1p=c1.pre[0]
     if c1.predirection[0]:
#      if c1p.next[0] ==c1:
      c1p.next.pop()
      c1p.nextdirection.pop()
      c1p.nextdistance=nodelength
     else:
      c1p.pre.pop()
      c1p.predirection.pop()
      c1p.predistance=nodelength
     c1.pre[0] = c2
     c1.predirection[0] = True
    if c2.nextdistance == nodelength:
     c2.next.append(c1)
     c2.nextdirection.append(True)
    else:
     c2n=c2.next[0]
     if c2.nextdirection[0]:
      c2n.pre.pop()
      c2n.predirection.pop()
      c2n.predistance=nodelength
     else:
      c2n.next.pop()
      c2n.nextdirection.pop()
      c2n.nextdistance=nodelength
     c2.next[0]=c1
     c2.nextdirection[0]=True
    c1.predistance=temp
    c2.nextdistance=temp
    c2.nextsequence = getreverse(readtemp.sequence)[k_mer+cl2-1:temp+cl2-1]
    c1.presequence=c2.nextsequence[:]
# pre False
#  if cp1+cp2 +1< nodelength:
  temp=cp1+cp2 +1
  if temp < readtemp.kl:
   temp = readtemp.kl-temp   
   otemp=k_mer-temp
   if (temp < c1.predistance) and (temp < c2.predistance) and ((otemp <=0) or c1.sequence[:otemp] == c2.reverse[-otemp:]):
    if c1.predistance == nodelength:
     c1.pre.append(c2)
     c1.predirection.append(False)
    else:
     c1p=c1.pre[0]
     if c1.predirection[0]:
#      if c1p.next[0] ==c1:
      c1p.next.pop()
      c1p.nextdirection.pop()
      c1p.nextdistance=nodelength
     else:
      c1p.pre.pop()
      c1p.predirection.pop()
      c1p.predistance=nodelength
     c1.pre[0] = c2
     c1.predirection[0] = False
    if c2.predistance == nodelength:
     c2.pre.append(c1)
     c2.predirection.append(False)
    else:
     c2p=c2.pre[0]
     if c2.predirection[0]:
      c2p.next.pop()
      c2p.nextdirection.pop()
      c2p.nextdistance=nodelength
     else:
      c2p.pre.pop()
      c2p.predirection.pop()
      c2p.predistance=nodelength
     c2.pre[0]=c1
     c2.predirection[0]=False
    c1.predistance=temp
    c2.predistance=temp
    c2.presequence = readtemp.sequence[-temp-cp2:-k_mer-cp2]
    c1.presequence= getreverse(c2.presequence)
# fusecontig
 contigpool = fusecontig(contigpool, nodelength)
 contigpool.extend(temppool)
 return contigpool

def materead(contigpool,nodepool,readpool):
 contigset= set(contigpool)
 kmm=k_mer-1
 for temp in contigpool:
  temp.fresh=True
  temp.pre=[]
  temp.predirection=[]
  temp.next=[]
  temp.nextdirection=[]
  temp.nextdistance=[]
  temp.predistance=[]
 for i in range(0,len(nodepool),2):
  readtemp=readpool[i/2]
  (temp,c1,cp1)=nodepool[i]
  (temp,c2,cp2)=nodepool[i+1]
  if (c1 not in contigset) or (c2 not in contigset):
   continue
  cl1=len(c1.nodelist) -cp1
  cl2=len(c2.nodelist) -cp2
# next true
#  if cl1< nodelength:
#   if cp2 < nodelength:
#  if cl1+cp2 < nodelength:
  temp=cl1+cp2
  if temp < len(readtemp)-kmm:
   temp = len(readtemp)-kmm-temp   
   otemp=k_mer-temp
   if (otemp >0) and c1.sequence[-otemp:] != c2.sequence[:otemp]:
    continue
   d=findpair(c1.next,c1.nextdirection,c2,True)
   tempsequence=readtemp[cl1+kmm:-(cp2+k_mer)]
   if d== -1:
    c1.next.append(c2)
    c1.nextdirection.append(True)
    c1.nextdistance.append([[temp,tempsequence]])
    c2.pre.append(c1)
    c2.predirection.append(True)
    c2.predistance.append([[temp,tempsequence]])
   else:
    c1.nextdistance[d].append([temp,tempsequence])
    d=findpair(c2.pre,c2.predirection,c1,True)
    c2.predistance[d].append([temp,tempsequence])
# next False
#  if cl1+cl2 -1 < nodelength:
  temp=cl1+cl2 -1
  if temp < len(readtemp)-kmm and (c1 is not c2):
   temp = len(readtemp)-kmm-temp   
   otemp=k_mer-temp
   if (otemp >0) and c1.sequence[-otemp:] != c2.reverse[:otemp]:
    continue
   d=findpair(c1.next,c1.nextdirection,c2,False)
   tempsequence=readtemp[cl1+kmm:-(cl2+kmm)]
   if d== -1:
    c1.next.append(c2)
    c1.nextdirection.append(False)
    c1.nextdistance.append([[temp,tempsequence]])
    c2.next.append(c1)
    c2.nextdirection.append(False)
    c2.nextdistance.append([[temp,getreverse(tempsequence)]])
   else:
    c1.nextdistance[d].append([temp,tempsequence])
    d=findpair(c2.next,c2.nextdirection,c1,False)
    c2.nextdistance[d].append([temp,getreverse(tempsequence)])
# pre True
#  if cp1+cl2 < nodelength:
  temp=cp1+cl2 
  if temp < len(readtemp)-kmm:
   temp = len(readtemp)-kmm-temp   
   otemp=k_mer-temp
   if (otemp >0) and c1.sequence[:otemp] != c2.sequence[-otemp:]:
    continue
   d=findpair(c1.pre,c1.predirection,c2,True)
   tempsequence=getreverse(readtemp[cp1+k_mer:-(cl2+kmm)])
   if d== -1:
    c1.pre.append(c2)
    c1.predirection.append(True)
    c1.predistance.append([[temp,tempsequence]])
    c2.next.append(c1)
    c2.nextdirection.append(True)
    c2.nextdistance.append([[temp,tempsequence]])
   else:
    c1.predistance[d].append([temp,tempsequence])
    d=findpair(c2.next,c2.nextdirection,c1,True)
    c2.nextdistance[d].append([temp,tempsequence])
# pre False
#  if cp1+cp2 +1< nodelength:
  temp=cp1+cp2 +1
  if temp < len(readtemp)-kmm and (c1 is not c2):
   temp = len(readtemp)-kmm-temp   
   otemp=k_mer-temp
   if (otemp >0) and c1.sequence[:otemp] != c2.reverse[-otemp:]:
    continue
   d=findpair(c1.pre,c1.predirection,c2,False)
   tempsequence=readtemp[cp1+k_mer:-(cp2+k_mer)]
   if d== -1:
    c1.pre.append(c2)
    c1.predirection.append(False)
    c1.predistance.append([[temp,getreverse(tempsequence)]])
    c2.pre.append(c1)
    c2.predirection.append(False)
    c2.predistance.append([[temp,tempsequence]])
   else:
    c1.predistance[d].append([temp,getreverse(tempsequence)])
    d=findpair(c2.pre,c2.predirection,c1,False)
    c2.predistance[d].append([temp,tempsequence])
 contigpool=setmatelink(contigpool)
# contigpool = [removetransitivemethod(contigtemp) for contigtemp in contigpool]
 contigpool=matecontig(contigpool)
 return contigpool

def smateread_o(contigpool,nodepool,readpool):
 kmm=k_mer-1
 contigpool = ssetnodeinfo( contigpool)
 nodelength=max([len(readtemp) for readtemp in readpool ])
 temppool= [contigtemp for contigtemp in contigpool if len(contigtemp.pre) and len(contigtemp.next) ]
 tempset = set(temppool)
 contigpool = [contigtemp for contigtemp in contigpool if contigtemp not in tempset]
 contigset= set(contigpool)
 for temp in contigpool:
  temp.fresh=True
  if len(temp.next):
   temp.nextdistance=1
  else:
   temp.nextdistance=nodelength
  if len(temp.pre):
   temp.predistance=1
  else:
   temp.predistance=nodelength
 for i in range(0,len(nodepool),2):
  readtemp=readpool[i/2]
  c1=nodepool[i][1]
  cp1=nodepool[i][2]
  if c1 not in contigset:
   continue
  c2=nodepool[i+1][1]
  cp2=nodepool[i+1][2]
  if c2 not in contigset or c1 == c2:
   continue
  cl1=len(c1.nodelist) -cp1
  cl2=len(c2.nodelist) -cp2
# next true
#  if cl1< nodelength:
#   if cp2 < nodelength:
#  if cl1+cp2 < nodelength:
  temp=cl1+cp2
  if temp < len(readtemp)-kmm:
   temp = len(readtemp)-kmm-temp   
   otemp=k_mer-temp
   if (temp < c1.nextdistance) and (temp < c2.predistance) and ((otemp <=0) or c1.sequence[-otemp:] == c2.sequence[:otemp]):
    if c1.nextdistance == nodelength:
     c1.next.append(c2)
     c1.nextdirection.append(True)
    else:
     c1n=c1.next[0]
     if c1.nextdirection[0]:
#      if c1n.pre[0] ==c1:
      c1n.pre.pop()
      c1n.predirection.pop()
      c1n.predistance=nodelength
     else:
      c1n.next.pop()
      c1n.nextdirection.pop()
      c1n.nextdistance=nodelength
     c1.next[0] = c2
     c1.nextdirection[0] = True
    if c2.predistance == nodelength:
     c2.pre.append(c1)
     c2.predirection.append(True)
    else:
     c2p=c2.pre[0]
     if c2.predirection[0]:
      c2p.next.pop()
      c2p.nextdirection.pop()
      c2p.nextdistance=nodelength
     else:
      c2p.pre.pop()
      c2p.predirection.pop()
      c2p.predistance=nodelength
     c2.pre[0]=c1
     c2.predirection[0]=True
    c1.nextdistance=temp
    c2.predistance=temp
    c1.nextsequence=readtemp[k_mer+cl1-1:temp+cl1-1]
    c2.presequence = c1.nextsequence[:]
# next False
#  if cl1+cl2 -1 < nodelength:
  temp=cl1+cl2 -1
  if temp < len(readtemp)-kmm:
   temp = len(readtemp)-kmm-temp   
   otemp=k_mer-temp
   if (temp < c1.nextdistance) and (temp < c2.nextdistance) and ((otemp <=0) or c1.sequence[-otemp:] == c2.reverse[:otemp]):
    if c1.nextdistance == nodelength:
     c1.next.append(c2)
     c1.nextdirection.append(False)
    else:
     c1n=c1.next[0]
     if c1.nextdirection[0]:
#      if c1n.pre[0] ==c1:
      c1n.pre.pop()
      c1n.predirection.pop()
      c1n.predistance=nodelength
     else:
      c1n.next.pop()
      c1n.nextdirection.pop()
      c1n.nextdistance=nodelength
     c1.next[0] = c2
     c1.nextdirection[0] = False
    if c2.nextdistance == nodelength:
     c2.next.append(c1)
     c2.nextdirection.append(False)
    else:
     c2n=c2.next[0]
     if c2.nextdirection[0]:
      c2n.pre.pop()
      c2n.predirection.pop()
      c2n.predistance=nodelength
     else:
      c2n.next.pop()
      c2n.nextdirection.pop()
      c2n.nextdistance=nodelength
     c2.next[0]=c1
     c2.nextdirection[0]=False
    c1.nextdistance=temp
    c2.nextdistance=temp
    c1.nextsequence=readtemp[k_mer+cl1-1:temp+cl1-1]
    c2.nextsequence = getreverse(c1.nextsequence)
# pre True
#  if cp1+cl2 < nodelength:
  temp=cp1+cl2 
  if temp < len(readtemp)-kmm:
   temp = len(readtemp)-kmm-temp   
   otemp=k_mer-temp
   if (temp < c1.predistance) and (temp < c2.nextdistance) and ((otemp <=0) or c1.sequence[:otemp] == c2.sequence[-otemp:]):
    if c1.predistance == nodelength:
     c1.pre.append(c2)
     c1.predirection.append(True)
    else:
     c1p=c1.pre[0]
     if c1.predirection[0]:
#      if c1p.next[0] ==c1:
      c1p.next.pop()
      c1p.nextdirection.pop()
      c1p.nextdistance=nodelength
     else:
      c1p.pre.pop()
      c1p.predirection.pop()
      c1p.predistance=nodelength
     c1.pre[0] = c2
     c1.predirection[0] = True
    if c2.nextdistance == nodelength:
     c2.next.append(c1)
     c2.nextdirection.append(True)
    else:
     c2n=c2.next[0]
     if c2.nextdirection[0]:
      c2n.pre.pop()
      c2n.predirection.pop()
      c2n.predistance=nodelength
     else:
      c2n.next.pop()
      c2n.nextdirection.pop()
      c2n.nextdistance=nodelength
     c2.next[0]=c1
     c2.nextdirection[0]=True
    c1.predistance=temp
    c2.nextdistance=temp
    c2.nextsequence = getreverse(readtemp)[k_mer+cl2-1:temp+cl2-1]
    c1.presequence=c2.nextsequence[:]
# pre False
#  if cp1+cp2 +1< nodelength:
  temp=cp1+cp2 +1
  if temp < len(readtemp)-kmm:
   temp = len(readtemp)-kmm-temp   
   otemp=k_mer-temp
   if (temp < c1.predistance) and (temp < c2.predistance) and ((otemp <=0) or c1.sequence[:otemp] == c2.reverse[-otemp:]):
    if c1.predistance == nodelength:
     c1.pre.append(c2)
     c1.predirection.append(False)
    else:
     c1p=c1.pre[0]
     if c1.predirection[0]:
#      if c1p.next[0] ==c1:
      c1p.next.pop()
      c1p.nextdirection.pop()
      c1p.nextdistance=nodelength
     else:
      c1p.pre.pop()
      c1p.predirection.pop()
      c1p.predistance=nodelength
     c1.pre[0] = c2
     c1.predirection[0] = False
    if c2.predistance == nodelength:
     c2.pre.append(c1)
     c2.predirection.append(False)
    else:
     c2p=c2.pre[0]
     if c2.predirection[0]:
      c2p.next.pop()
      c2p.nextdirection.pop()
      c2p.nextdistance=nodelength
     else:
      c2p.pre.pop()
      c2p.predirection.pop()
      c2p.predistance=nodelength
     c2.pre[0]=c1
     c2.predirection[0]=False
    c1.predistance=temp
    c2.predistance=temp
    c2.presequence = readtemp[-temp-cp2:-k_mer-cp2]
    c1.presequence= getreverse(c2.presequence)
# fusecontig
 contigpool = sfusecontig(contigpool, nodelength)
 contigpool.extend(temppool)
 return contigpool

def sfusecontig(contigpool, maxlength):
# nn= ''.join(['N']*maxlength)
 pool=[]
 for   contigtemp in contigpool:
  if not contigtemp.fresh:
   continue
  if contigtemp.nextdistance != 1 and contigtemp.nextdistance != maxlength:
   temp = contigtemp.nextdistance- k_mer
   if temp >0:
    contigtemp.nextdistance=k_mer
    for i in range(temp):
     contigtemp.nodelist.append( [0,None,None])
    contigtemp.sequence = contigtemp.sequence + contigtemp.nextsequence
    contigtemp.reverse = getreverse(contigtemp.nextsequence) + contigtemp.reverse
   temp=contigtemp.extendnext(contigtemp,True,contigtemp.nextdistance)
   while temp is not None:
    (current,samedirection)=temp
    current.fresh=False
    if samedirection:
     distance=current.nextdistance
    else:
     distance=current.predistance
    if distance == maxlength:
     break
    temp = distance- k_mer
    if temp >0:
     distance=k_mer
     for i in range(temp):
      contigtemp.nodelist.append( [0,None,None])
     if samedirection:
      contigtemp.sequence = contigtemp.sequence + current.nextsequence
      contigtemp.reverse = getreverse(current.nextsequence) + contigtemp.reverse
     else:
      contigtemp.sequence = contigtemp.sequence + getreverse(current.presequence)
      contigtemp.reverse = current.presequence + contigtemp.reverse
    temp=contigtemp.extendnext(current,samedirection, distance)
  if contigtemp.predistance != 1 and contigtemp.predistance != maxlength:
   temp = contigtemp.predistance- k_mer
   if temp >0:
    contigtemp.predistance=k_mer
    for i in range(temp):
     contigtemp.nodelist.insert(0, [0,None,None])
    contigtemp.sequence = contigtemp.presequence + contigtemp.sequence
    contigtemp.reverse = contigtemp.reverse + getreverse(contigtemp.presequence)
   temp=contigtemp.extendpre(contigtemp,True,contigtemp.predistance)
   while temp is not None:
    (current,samedirection)=temp
    current.fresh=False
    if samedirection:
     distance=current.predistance
    else:
     distance=current.nextdistance
    if distance == maxlength:
     break
    temp = distance- k_mer
    if temp >0:
     distance=k_mer
     for i in range(temp):
      contigtemp.nodelist.insert(0, [0,None,None])
     if samedirection:
      contigtemp.sequence = current.presequence + contigtemp.sequence
      contigtemp.reverse = contigtemp.reverse + getreverse(current.presequence)
     else:
      contigtemp.sequence = getreverse(current.nextsequence) + contigtemp.sequence
      contigtemp.reverse = contigtemp.reverse + current.nextsequence
    temp=contigtemp.extendpre(current,samedirection, distance)
  contigtemp.fresh=False
  contigtemp.weight=contigtemp.sum/float(len(contigtemp.sequence)+1-k_mer)
  pool.append(contigtemp)
 return pool

def fusecontig(contigpool, maxlength):
# nn= ''.join(['N']*maxlength)
 pool=[]
 for   contigtemp in contigpool:
  if not contigtemp.fresh:
   continue
  if contigtemp.nextdistance != 1 and contigtemp.nextdistance != maxlength:
   temp = contigtemp.nextdistance- k_mer
   if temp >0:
    contigtemp.nextdistance=k_mer
    for i in range(temp):
     contigtemp.nodelist.append( node())
    contigtemp.sequence = contigtemp.sequence + contigtemp.nextsequence
    contigtemp.reverse = getreverse(contigtemp.nextsequence) + contigtemp.reverse
   temp=contigtemp.extendnext(contigtemp,True,contigtemp.nextdistance)
   while temp is not None:
    (current,samedirection)=temp
    current.fresh=False
    if samedirection:
     distance=current.nextdistance
    else:
     distance=current.predistance
    if distance == maxlength:
     break
    temp = distance- k_mer
    if temp >0:
     distance=k_mer
     for i in range(temp):
      contigtemp.nodelist.append( node())
     if samedirection:
      contigtemp.sequence = contigtemp.sequence + current.nextsequence
      contigtemp.reverse = getreverse(current.nextsequence) + contigtemp.reverse
     else:
      contigtemp.sequence = contigtemp.sequence + getreverse(current.presequence)
      contigtemp.reverse = current.presequence + contigtemp.reverse
    temp=contigtemp.extendnext(current,samedirection, distance)
  if contigtemp.predistance != 1 and contigtemp.predistance != maxlength:
   temp = contigtemp.predistance- k_mer
   if temp >0:
    contigtemp.predistance=k_mer
    for i in range(temp):
     contigtemp.nodelist.insert(0, node())
    contigtemp.sequence = contigtemp.presequence + contigtemp.sequence
    contigtemp.reverse = contigtemp.reverse + getreverse(contigtemp.presequence)
   temp=contigtemp.extendpre(contigtemp,True,contigtemp.predistance)
   while temp is not None:
    (current,samedirection)=temp
    current.fresh=False
    if samedirection:
     distance=current.predistance
    else:
     distance=current.nextdistance
    if distance == maxlength:
     break
    temp = distance- k_mer
    if temp >0:
     distance=k_mer
     for i in range(temp):
      contigtemp.nodelist.insert(0, node())
     if samedirection:
      contigtemp.sequence = current.presequence + contigtemp.sequence
      contigtemp.reverse = contigtemp.reverse + getreverse(current.presequence)
     else:
      contigtemp.sequence = getreverse(current.nextsequence) + contigtemp.sequence
      contigtemp.reverse = contigtemp.reverse + current.nextsequence
    temp=contigtemp.extendpre(current,samedirection, distance)
  contigtemp.fresh=False
  contigtemp.weight=contigtemp.sum/float(len(contigtemp.sequence)+1-k_mer)
  pool.append(contigtemp)
 return pool

def matepseudo(contigpool,nodepool,readpool, nodelength=100):
 check = kpgraph.aligndistance6
 if metric == 'approximate':
#  check = kpgraph.indistance
  check = withindistance
 if metric == 'alignment':
  check = kpgraph.aligndistance6
 if metric == 'unconditional':
  check = distancetrue
  temp=c_number+k_mer-1
  if nodelength > temp:
   nodelength=temp
 contigset= set(contigpool)
 for temp in contigpool:
  temp.fresh=True
  temp.nextdistance=nodelength
  temp.predistance=nodelength
 for i in range(0,len(nodepool),2):
  readtemp=readpool[i/2]
  (c1,cp1,cd1)=nodepool[i].contiginfo
  if c1 not in contigset:
   continue
  (c2,cp2,cd2)=nodepool[i+1].contiginfo
  if c2 not in contigset or c1 == c2:
   continue
  cl1=len(c1.sequence) +1 - k_mer -cp1
  cl2=len(c2.sequence) +1 - k_mer -cp2
# next true
#  if cl1< nodelength:
#   if cp2 < nodelength:
#  if cl1+cp2 < nodelength:
  if (cd1==1) and (cd2==1):
   temp=cl1+cp2
   if temp >= readtemp.kl:
    continue
   temp = readtemp.kl-temp   
   otemp=k_mer-temp
   if (temp < c1.nextdistance) and (temp < c2.predistance) and ((otemp <=0) or c1.sequence[-otemp:] == c2.sequence[:otemp]):
    poltemp=len(c1.sequence)-cp1
    noltemp=cp2+k_mer
#    if (not kpgraph.indistance(c1.sequence[cp1:], readpool[i/2].sequence[:poltemp],poltemp/m_mer)) or (not kpgraph.indistance(c2.sequence[:noltemp],readpool[i/2].sequence[-noltemp:],noltemp/m_mer)):
#    if (not kpgraph.aligndistance6(c1.sequence[cp1:], readpool[i/2].sequence[:poltemp],poltemp/m_mer)) or (not kpgraph.aligndistance6(c2.sequence[:noltemp],readpool[i/2].sequence[-noltemp:],noltemp/m_mer)):
    if (not check(c1.sequence[cp1:], readpool[i/2].sequence[:poltemp],poltemp/m_mer)) or (not check(c2.sequence[:noltemp],readpool[i/2].sequence[-noltemp:],noltemp/m_mer)):
     continue
    if c1.nextdistance == nodelength:
     c1.next.append(c2)
     c1.nextdirection.append(True)
    else:
     c1n=c1.next[0]
     if c1.nextdirection[0]:
#      if c1n.pre[0] ==c1:
      c1n.pre.pop()
      c1n.predirection.pop()
      c1n.predistance=nodelength
     else:
      c1n.next.pop()
      c1n.nextdirection.pop()
      c1n.nextdistance=nodelength
     c1.next[0] = c2
     c1.nextdirection[0] = True
    if c2.predistance == nodelength:
     c2.pre.append(c1)
     c2.predirection.append(True)
    else:
     c2p=c2.pre[0]
     if c2.predirection[0]:
      c2p.next.pop()
      c2p.nextdirection.pop()
      c2p.nextdistance=nodelength
     else:
      c2p.pre.pop()
      c2p.predirection.pop()
      c2p.predistance=nodelength
     c2.pre[0]=c1
     c2.predirection[0]=True
    c1.nextdistance=temp
    c2.predistance=temp
    c1.nextsequence=readtemp.sequence[k_mer+cl1-1:temp+cl1-1]
    c2.presequence = c1.nextsequence[:]
# next False
#  if cl1+cl2 -1 < nodelength:
  if (cd1==1) and (cd2==0):
   temp=cl1+cl2 -1
   if temp >= readtemp.kl:
    continue
   temp = readtemp.kl-temp   
   otemp=k_mer-temp
   if (temp < c1.nextdistance) and (temp < c2.nextdistance) and ((otemp <=0) or c1.sequence[-otemp:] == c2.reverse[:otemp]):
    poltemp=len(c1.sequence)-cp1
    noltemp=len(c2.sequence)-cp2
#    if (not kpgraph.indistance(c1.sequence[cp1:], readpool[i/2].sequence[:poltemp],poltemp/m_mer)) or (not kpgraph.indistance(c2.reverse[:noltemp],readpool[i/2].sequence[-noltemp:],noltemp/m_mer)):
    if (not check(c1.sequence[cp1:], readpool[i/2].sequence[:poltemp],poltemp/m_mer)) or (not check(c2.reverse[:noltemp],readpool[i/2].sequence[-noltemp:],noltemp/m_mer)):
     continue
    if c1.nextdistance == nodelength:
     c1.next.append(c2)
     c1.nextdirection.append(False)
    else:
     c1n=c1.next[0]
     if c1.nextdirection[0]:
#      if c1n.pre[0] ==c1:
      c1n.pre.pop()
      c1n.predirection.pop()
      c1n.predistance=nodelength
     else:
      c1n.next.pop()
      c1n.nextdirection.pop()
      c1n.nextdistance=nodelength
     c1.next[0] = c2
     c1.nextdirection[0] = False
    if c2.nextdistance == nodelength:
     c2.next.append(c1)
     c2.nextdirection.append(False)
    else:
     c2n=c2.next[0]
     if c2.nextdirection[0]:
      c2n.pre.pop()
      c2n.predirection.pop()
      c2n.predistance=nodelength
     else:
      c2n.next.pop()
      c2n.nextdirection.pop()
      c2n.nextdistance=nodelength
     c2.next[0]=c1
     c2.nextdirection[0]=False
    c1.nextdistance=temp
    c2.nextdistance=temp
    c1.nextsequence=readtemp.sequence[k_mer+cl1-1:temp+cl1-1]
    c2.nextsequence = getreverse(c1.nextsequence)
# pre True
#  if cp1+cl2 < nodelength:
  if (cd1==0) and (cd2==0):
   temp=cp1+cl2 
   if temp >= readtemp.kl:
    continue
   temp = readtemp.kl-temp   
   otemp=k_mer-temp
   if (temp < c1.predistance) and (temp < c2.nextdistance) and ((otemp <=0) or c1.sequence[:otemp] == c2.sequence[-otemp:]):
    poltemp=cp1+k_mer
    noltemp=len(c2.sequence) - cp2
#    if (not kpgraph.indistance(c1.reverse[-poltemp:], readpool[i/2].sequence[:poltemp],poltemp/m_mer)) or (not kpgraph.indistance(c2.reverse[:noltemp],readpool[i/2].sequence[-noltemp:],noltemp/m_mer)):
    if (not check(c1.reverse[-poltemp:], readpool[i/2].sequence[:poltemp],poltemp/m_mer)) or (not check(c2.reverse[:noltemp],readpool[i/2].sequence[-noltemp:],noltemp/m_mer)):
     continue
    if c1.predistance == nodelength:
     c1.pre.append(c2)
     c1.predirection.append(True)
    else:
     c1p=c1.pre[0]
     if c1.predirection[0]:
#      if c1p.next[0] ==c1:
      c1p.next.pop()
      c1p.nextdirection.pop()
      c1p.nextdistance=nodelength
     else:
      c1p.pre.pop()
      c1p.predirection.pop()
      c1p.predistance=nodelength
     c1.pre[0] = c2
     c1.predirection[0] = True
    if c2.nextdistance == nodelength:
     c2.next.append(c1)
     c2.nextdirection.append(True)
    else:
     c2n=c2.next[0]
     if c2.nextdirection[0]:
      c2n.pre.pop()
      c2n.predirection.pop()
      c2n.predistance=nodelength
     else:
      c2n.next.pop()
      c2n.nextdirection.pop()
      c2n.nextdistance=nodelength
     c2.next[0]=c1
     c2.nextdirection[0]=True
    c1.predistance=temp
    c2.nextdistance=temp
    c2.nextsequence = getreverse(readtemp.sequence)[k_mer+cl2-1:temp+cl2-1]
    c1.presequence=c2.nextsequence[:]
# pre False
#  if cp1+cp2 +1< nodelength:
  if (cd1==0) and (cd2==1):
   temp=cp1+cp2 +1
   if temp >= readtemp.kl:
    continue
   temp = readtemp.kl-temp   
   otemp=k_mer-temp
   if (temp < c1.predistance) and (temp < c2.predistance) and ((otemp <=0) or c1.sequence[:otemp] == c2.reverse[-otemp:]):
    poltemp=cp1+k_mer
    noltemp=cp2+k_mer
#    if (not kpgraph.indistance(c1.reverse[-poltemp:], readpool[i/2].sequence[:poltemp],poltemp/m_mer)) or (not kpgraph.indistance(c2.sequence[:noltemp],readpool[i/2].sequence[-noltemp:],noltemp/m_mer)):
    if (not check(c1.reverse[-poltemp:], readpool[i/2].sequence[:poltemp],poltemp/m_mer)) or (not check(c2.sequence[:noltemp],readpool[i/2].sequence[-noltemp:],noltemp/m_mer)):
     continue
    if c1.predistance == nodelength:
     c1.pre.append(c2)
     c1.predirection.append(False)
    else:
     c1p=c1.pre[0]
     if c1.predirection[0]:
#      if c1p.next[0] ==c1:
      c1p.next.pop()
      c1p.nextdirection.pop()
      c1p.nextdistance=nodelength
     else:
      c1p.pre.pop()
      c1p.predirection.pop()
      c1p.predistance=nodelength
     c1.pre[0] = c2
     c1.predirection[0] = False
    if c2.predistance == nodelength:
     c2.pre.append(c1)
     c2.predirection.append(False)
    else:
     c2p=c2.pre[0]
     if c2.predirection[0]:
      c2p.next.pop()
      c2p.nextdirection.pop()
      c2p.nextdistance=nodelength
     else:
      c2p.pre.pop()
      c2p.predirection.pop()
      c2p.predistance=nodelength
     c2.pre[0]=c1
     c2.predirection[0]=False
    c1.predistance=temp
    c2.predistance=temp
    c2.presequence = readtemp.sequence[-temp-cp2:-k_mer-cp2]
    c1.presequence= getreverse(c2.presequence)
# fusecontig
 contigpool = fusecontig(contigpool, nodelength)
 return contigpool

def estimatepairlength(contigpool,nodepool):
 pairlength=[]
 contigset= set(contigpool)
 for i in range(0,len(nodepool),2):
  (c1,cp1,cd1)=nodepool[i].contiginfo
  if c1 not in contigset:
   continue
  (c2,cp2,cd2)=nodepool[i+1].contiginfo
  if c2 not in contigset:
   continue
  if (c1 == c2) and (cd1 == cd2):
   pairlength.append(abs(cp1-cp2)+1)
 pairlength.sort()
 return pairlength[len(pairlength)/2]

def estimateinsertsize(contigpool,nodepool,readpool):
 insertsize=[]
 reverse1=0
 reverse2=0
 contigset= set(contigpool)
 for i in range(0,len(nodepool),4):
# for i in range(len(nodepool)-4,-4,-4):
  (temp,c1,cp1)=nodepool[i]
  (temp,c2,cp2)=nodepool[i+1]
  if (c1 not in contigset) or (c1 is not c2) or (abs(cp1-cp2)!= len(readpool[i/2])-k_mer):
   continue
  (temp,c3,cp3)=nodepool[i+2]
  (temp,c4,cp4)=nodepool[i+3]
  if (c3 is not c1) or (c4 is not c3) or (abs(cp3-cp4)!= len(readpool[(i/2)+1])-k_mer):
   continue
  if cp2>cp1:
   if max(cp3,cp4) > max(cp1,cp2):
    if cp4>cp3:
     reverse1-=1
     reverse2-=1
     length= cp4 -cp1
    else:
     reverse1-=1
     reverse2+=1
     length= cp3 -cp1
   else:
    if cp4>cp3:
     reverse1+=1
     reverse2+=1
     length= cp2 -cp3
    else:
     reverse1+=1
     reverse2-=1
     length= cp2 -cp4
  else:
   if max(cp3,cp4) > max(cp1,cp2):
    if cp4>cp3:
     reverse1+=1
     reverse2-=1
     length= cp4 -cp2
    else:
     reverse1+=1
     reverse2+=1
     length= cp3 -cp2
   else:
    if cp4>cp3:
     reverse1-=1
     reverse2+=1
     length= cp1 -cp3
    else:
     reverse1-=1
     reverse2-=1
     length= cp1 -cp4
#  insertsize2.append(minlength)
#  insertsize3.append(maxlength)
#  insertsize4.append((abs(cp1-cp4) +abs(cp2-cp3) -abs(cp1-cp2) -abs(cp3-cp4))/2)
  insertsize.append(length+k_mer)
# writelist('insertsize.rtf',insertsize)
# writelist('insertsize2.rtf',insertsize2)
# writelist('insertsize3.rtf',insertsize3)
# writelist('insertsize4.rtf',insertsize4)
 print 'estimate insertsize:'
 insertsize =reportlist(insertsize)
# print 'min insertsize:'
# reportlist(insertsize2)
# print 'max insertsize:'
# reportlist(insertsize3)
# print 'ideal insertsize:'
# reportlist(insertsize4)
 print 'Front end reverse count:',reverse1
 print 'Rear end reverse count:',reverse2
 if reverse1 > 0:
  for i in range(0,len(nodepool),4):
   temp=nodepool[i]
   nodepool[i]=nodepool[i+1]
   nodepool[i+1]=temp
   readpool[i/2]=getreverse(readpool[i/2])
 if reverse2 > 0:
  for i in range(0,len(nodepool),4):
   ipt=i+2
   temp=nodepool[ipt]
   nodepool[ipt]=nodepool[i+3]
   nodepool[i+3]=temp
   readpool[ipt/2]=getreverse(readpool[ipt/2])
 print 'Estimated insertsize=',str(insertsize)
 return insertsize

def medium(a):
 a.sort()
 return a[len(a)/2]

def inoverlap(s1,s2,overlap):
 of=overlap/20
 for o in range(overlap+of,overlap-of-1,-1): 
  if s1[-o:]==s2[:o]:
   return o
 return -1

def findmaxcount(l):
 sl =[]
 cl =[]
 c =0
 ll=len(l)
 while c<ll:
  temp=l[c]
  sl.append(temp)
  c+=1
  ctemp=1
  while (c<ll)and (temp ==l[c]):
   c+=1
   ctemp+=1
  cl.append(ctemp)
 [cl,sl]=sorttuple([cl,sl])
 return [sl[len(sl)-1],cl[len(cl)-1]]

def sorttuple(l):
 if len(l)==0:
  return l
 t=[]
 for i in range(len(l[0])):
  t.append(tuple([l[j][i] for j in range(len(l))]))
 t.sort()
 for i in range(len(l)):
  for j in range(len(l[0])):
   l[i][j] =t[j][i]
 return l

def removetransitive(contigtemp):
 if len(contigtemp.next) > 1:
  [contigtemp.nextdistance,contigtemp.next,contigtemp.nextdirection]=sorttuple([contigtemp.nextdistance,contigtemp.next,contigtemp.nextdirection])
  for i in range(len(contigtemp.next)-1,0,-1):
   if  contigtemp.nextdirection[i]:
    for j in range(i):
     if contigtemp.nextdirection[j]:
      p =findpair(contigtemp.next[j].next,contigtemp.next[j].nextdirection,contigtemp.next[i],True)
      if p >=0:
       nexttemp=contigtemp.next[i]
       f =findpair(nexttemp.pre,nexttemp.predirection,contigtemp,True)
       nexttemp.pre.pop(f)
       nexttemp.predirection.pop(f)
       nexttemp.predistance.pop(f)
       contigtemp.next.pop(i)
       contigtemp.nextdirection.pop(i)
       contigtemp.nextdistance.pop(i)
       break
     else:
      p =findpair(contigtemp.next[j].pre,contigtemp.next[j].predirection,contigtemp.next[i],False)
      if p >=0:
       nexttemp=contigtemp.next[i]
       f =findpair(nexttemp.pre,nexttemp.predirection,contigtemp,True)
       nexttemp.pre.pop(f)
       nexttemp.predirection.pop(f)
       nexttemp.predistance.pop(f)
       contigtemp.next.pop(i)
       contigtemp.nextdirection.pop(i)
       contigtemp.nextdistance.pop(i)
       break
   else:
    for j in range(i):
     if contigtemp.nextdirection[j]:
      p =findpair(contigtemp.next[j].next,contigtemp.next[j].nextdirection,contigtemp.next[i],False)
      if p >=0:
       nexttemp=contigtemp.next[i]
       f =findpair(nexttemp.next,nexttemp.nextdirection,contigtemp,False)
       nexttemp.next.pop(f)
       nexttemp.nextdirection.pop(f)
       nexttemp.nextdistance.pop(f)
       contigtemp.next.pop(i)
       contigtemp.nextdirection.pop(i)
       contigtemp.nextdistance.pop(i)
       break
     else:
      p =findpair(contigtemp.next[j].pre,contigtemp.next[j].predirection,contigtemp.next[i],True)
      if p >=0:
       nexttemp=contigtemp.next[i]
       f =findpair(nexttemp.next,nexttemp.nextdirection,contigtemp,False)
       nexttemp.next.pop(f)
       nexttemp.nextdirection.pop(f)
       nexttemp.nextdistance.pop(f)
       contigtemp.next.pop(i)
       contigtemp.nextdirection.pop(i)
       contigtemp.nextdistance.pop(i)
       break
 if len(contigtemp.pre) > 1:
  [contigtemp.predistance,contigtemp.pre,contigtemp.predirection]=sorttuple([contigtemp.predistance,contigtemp.pre,contigtemp.predirection])
  for i in range(len(contigtemp.pre)-1,0,-1):
   if  contigtemp.predirection[i]:
    for j in range(i):
     if contigtemp.predirection[j]:
      p =findpair(contigtemp.pre[j].pre,contigtemp.pre[j].predirection,contigtemp.pre[i],True)
      if p >=0:
       pretemp=contigtemp.pre[i]
       f =findpair(pretemp.next,pretemp.nextdirection,contigtemp,True)
       pretemp.next.pop(f)
       pretemp.nextdirection.pop(f)
       pretemp.nextdistance.pop(f)
       contigtemp.pre.pop(i)
       contigtemp.predirection.pop(i)
       contigtemp.predistance.pop(i)
       break
     else:
      p =findpair(contigtemp.pre[j].next,contigtemp.pre[j].nextdirection,contigtemp.pre[i],False)
      if p >=0:
       pretemp=contigtemp.pre[i]
       f =findpair(pretemp.next,pretemp.nextdirection,contigtemp,True)
       pretemp.next.pop(f)
       pretemp.nextdirection.pop(f)
       pretemp.nextdistance.pop(f)
       contigtemp.pre.pop(i)
       contigtemp.predirection.pop(i)
       contigtemp.predistance.pop(i)
       break
   else:
    for j in range(i):
     if contigtemp.predirection[j]:
      p =findpair(contigtemp.pre[j].pre,contigtemp.pre[j].predirection,contigtemp.pre[i],False)
      if p >=0:
       pretemp=contigtemp.pre[i]
       f =findpair(pretemp.pre,pretemp.predirection,contigtemp,False)
       pretemp.pre.pop(f)
       pretemp.predirection.pop(f)
       pretemp.predistance.pop(f)
       contigtemp.pre.pop(i)
       contigtemp.predirection.pop(i)
       contigtemp.predistance.pop(i)
       break
     else:
      p =findpair(contigtemp.pre[j].next,contigtemp.pre[j].nextdirection,contigtemp.pre[i],True)
      if p >=0:
       pretemp=contigtemp.pre[i]
       f =findpair(pretemp.pre,pretemp.predirection,contigtemp,False)
       pretemp.pre.pop(f)
       pretemp.predirection.pop(f)
       pretemp.predistance.pop(f)
       contigtemp.pre.pop(i)
       contigtemp.predirection.pop(i)
       contigtemp.predistance.pop(i)
       break
 return contigtemp

def removetransitiveconflictcond(contigtemp):
 if len(contigtemp.next) > 1:
  [contigtemp.nextdistance,contigtemp.next,contigtemp.nextdirection]=sorttuple([contigtemp.nextdistance,contigtemp.next,contigtemp.nextdirection])
  for i in range(len(contigtemp.next)-1,0,-1):
   if  contigtemp.nextdirection[i]:
    for j in range(i):
     if (contigtemp.next[j]==contigtemp.next[i])or(((contigtemp.nextdistance[j]+len(contigtemp.next[j].nodelist))<=contigtemp.nextdistance[i])and((len(contigtemp.next[i].nodelist) +contigtemp.nextdistance[i] -contigtemp.nextdistance[j])<=insertsize)):
      nexttemp=contigtemp.next[i]
      f =findpair(nexttemp.pre,nexttemp.predirection,contigtemp,True)
      nexttemp.pre.pop(f)
      nexttemp.predirection.pop(f)
      nexttemp.predistance.pop(f)
      contigtemp.next.pop(i)
      contigtemp.nextdirection.pop(i)
      contigtemp.nextdistance.pop(i)
      break
   else:
    for j in range(i):
     if (contigtemp.next[j]==contigtemp.next[i])or(((contigtemp.nextdistance[j]+len(contigtemp.next[j].nodelist))<=contigtemp.nextdistance[i])and((len(contigtemp.next[i].nodelist) +contigtemp.nextdistance[i] -contigtemp.nextdistance[j])<=insertsize)):
      nexttemp=contigtemp.next[i]
      f =findpair(nexttemp.next,nexttemp.nextdirection,contigtemp,False)
      nexttemp.next.pop(f)
      nexttemp.nextdirection.pop(f)
      nexttemp.nextdistance.pop(f)
      contigtemp.next.pop(i)
      contigtemp.nextdirection.pop(i)
      contigtemp.nextdistance.pop(i)
      break
 if len(contigtemp.pre) > 1:
  [contigtemp.predistance,contigtemp.pre,contigtemp.predirection]=sorttuple([contigtemp.predistance,contigtemp.pre,contigtemp.predirection])
  for i in range(len(contigtemp.pre)-1,0,-1):
   if  contigtemp.predirection[i]:
    for j in range(i):
     if (contigtemp.pre[j]==contigtemp.pre[i])or(((contigtemp.predistance[j]+len(contigtemp.pre[j].nodelist))<=contigtemp.predistance[i])and((len(contigtemp.pre[i].nodelist) +contigtemp.predistance[i] -contigtemp.predistance[j])<=insertsize)):
      pretemp=contigtemp.pre[i]
      f =findpair(pretemp.next,pretemp.nextdirection,contigtemp,True)
      pretemp.next.pop(f)
      pretemp.nextdirection.pop(f)
      pretemp.nextdistance.pop(f)
      contigtemp.pre.pop(i)
      contigtemp.predirection.pop(i)
      contigtemp.predistance.pop(i)
      break
   else:
    for j in range(i):
     if (contigtemp.pre[j]==contigtemp.pre[i])or(((contigtemp.predistance[j]+len(contigtemp.pre[j].nodelist))<=contigtemp.predistance[i])and((len(contigtemp.pre[i].nodelist) +contigtemp.predistance[i] -contigtemp.predistance[j])<=insertsize)):
      pretemp=contigtemp.pre[i]
      f =findpair(pretemp.pre,pretemp.predirection,contigtemp,False)
      pretemp.pre.pop(f)
      pretemp.predirection.pop(f)
      pretemp.predistance.pop(f)
      contigtemp.pre.pop(i)
      contigtemp.predirection.pop(i)
      contigtemp.predistance.pop(i)
      break
 return contigtemp

def removetransitiveconflict(contigtemp):
 if len(contigtemp.next) > 1:
  [contigtemp.nextdistance,contigtemp.next,contigtemp.nextdirection]=sorttuple([contigtemp.nextdistance,contigtemp.next,contigtemp.nextdirection])
  for i in range(len(contigtemp.next)-1,0,-1):
   if  contigtemp.nextdirection[i]:
    for j in range(i):
     if (contigtemp.next[j]==contigtemp.next[i])or((contigtemp.nextdistance[j]+len(contigtemp.next[j].nodelist))<=contigtemp.nextdistance[i]):
      nexttemp=contigtemp.next[i]
      f =findpair(nexttemp.pre,nexttemp.predirection,contigtemp,True)
      nexttemp.pre.pop(f)
      nexttemp.predirection.pop(f)
      nexttemp.predistance.pop(f)
      contigtemp.next.pop(i)
      contigtemp.nextdirection.pop(i)
      contigtemp.nextdistance.pop(i)
      break
   else:
    for j in range(i):
     if (contigtemp.next[j]==contigtemp.next[i])or((contigtemp.nextdistance[j]+len(contigtemp.next[j].nodelist))<=contigtemp.nextdistance[i]):
      nexttemp=contigtemp.next[i]
      f =findpair(nexttemp.next,nexttemp.nextdirection,contigtemp,False)
      nexttemp.next.pop(f)
      nexttemp.nextdirection.pop(f)
      nexttemp.nextdistance.pop(f)
      contigtemp.next.pop(i)
      contigtemp.nextdirection.pop(i)
      contigtemp.nextdistance.pop(i)
      break
 if len(contigtemp.pre) > 1:
  [contigtemp.predistance,contigtemp.pre,contigtemp.predirection]=sorttuple([contigtemp.predistance,contigtemp.pre,contigtemp.predirection])
  for i in range(len(contigtemp.pre)-1,0,-1):
   if  contigtemp.predirection[i]:
    for j in range(i):
     if (contigtemp.pre[j]==contigtemp.pre[i])or((contigtemp.predistance[j]+len(contigtemp.pre[j].nodelist))<=contigtemp.predistance[i]):
      pretemp=contigtemp.pre[i]
      f =findpair(pretemp.next,pretemp.nextdirection,contigtemp,True)
      pretemp.next.pop(f)
      pretemp.nextdirection.pop(f)
      pretemp.nextdistance.pop(f)
      contigtemp.pre.pop(i)
      contigtemp.predirection.pop(i)
      contigtemp.predistance.pop(i)
      break
   else:
    for j in range(i):
     if (contigtemp.pre[j]==contigtemp.pre[i])or((contigtemp.predistance[j]+len(contigtemp.pre[j].nodelist))<=contigtemp.predistance[i]):
      pretemp=contigtemp.pre[i]
      f =findpair(pretemp.pre,pretemp.predirection,contigtemp,False)
      pretemp.pre.pop(f)
      pretemp.predirection.pop(f)
      pretemp.predistance.pop(f)
      contigtemp.pre.pop(i)
      contigtemp.predirection.pop(i)
      contigtemp.predistance.pop(i)
      break
 return contigtemp

def removetransitiveandconflict(contigtemp):
 if len(contigtemp.next) > 1:
  [contigtemp.nextdistance,contigtemp.next,contigtemp.nextdirection]=sorttuple([contigtemp.nextdistance,contigtemp.next,contigtemp.nextdirection])
  for i in range(len(contigtemp.next)-1,0,-1):
   if  contigtemp.nextdirection[i]:
    for j in range(i):
     if contigtemp.nextdirection[j]:
      p =findpair(contigtemp.next[j].next,contigtemp.next[j].nextdirection,contigtemp.next[i],True)
      if p >=0:
       nexttemp=contigtemp.next[i]
       f =findpair(nexttemp.pre,nexttemp.predirection,contigtemp,True)
       nexttemp.pre.pop(f)
       nexttemp.predirection.pop(f)
       nexttemp.predistance.pop(f)
       contigtemp.next.pop(i)
       contigtemp.nextdirection.pop(i)
       contigtemp.nextdistance.pop(i)
       break
     else:
      p =findpair(contigtemp.next[j].pre,contigtemp.next[j].predirection,contigtemp.next[i],False)
      if p >=0:
       nexttemp=contigtemp.next[i]
       f =findpair(nexttemp.pre,nexttemp.predirection,contigtemp,True)
       nexttemp.pre.pop(f)
       nexttemp.predirection.pop(f)
       nexttemp.predistance.pop(f)
       contigtemp.next.pop(i)
       contigtemp.nextdirection.pop(i)
       contigtemp.nextdistance.pop(i)
       break
     if contigtemp.next[j]==contigtemp.next[i]:
      nexttemp=contigtemp.next[i]
      f =findpair(nexttemp.pre,nexttemp.predirection,contigtemp,True)
      nexttemp.pre.pop(f)
      nexttemp.predirection.pop(f)
      nexttemp.predistance.pop(f)
      contigtemp.next.pop(i)
      contigtemp.nextdirection.pop(i)
      contigtemp.nextdistance.pop(i)
      break
   else:
    for j in range(i):
     if contigtemp.nextdirection[j]:
      p =findpair(contigtemp.next[j].next,contigtemp.next[j].nextdirection,contigtemp.next[i],False)
      if p >=0:
       nexttemp=contigtemp.next[i]
       f =findpair(nexttemp.next,nexttemp.nextdirection,contigtemp,False)
       nexttemp.next.pop(f)
       nexttemp.nextdirection.pop(f)
       nexttemp.nextdistance.pop(f)
       contigtemp.next.pop(i)
       contigtemp.nextdirection.pop(i)
       contigtemp.nextdistance.pop(i)
       break
     else:
      p =findpair(contigtemp.next[j].pre,contigtemp.next[j].predirection,contigtemp.next[i],True)
      if p >=0:
       nexttemp=contigtemp.next[i]
       f =findpair(nexttemp.next,nexttemp.nextdirection,contigtemp,False)
       nexttemp.next.pop(f)
       nexttemp.nextdirection.pop(f)
       nexttemp.nextdistance.pop(f)
       contigtemp.next.pop(i)
       contigtemp.nextdirection.pop(i)
       contigtemp.nextdistance.pop(i)
       break
     if contigtemp.next[j]==contigtemp.next[i]:
      nexttemp=contigtemp.next[i]
      f =findpair(nexttemp.next,nexttemp.nextdirection,contigtemp,False)
      nexttemp.next.pop(f)
      nexttemp.nextdirection.pop(f)
      nexttemp.nextdistance.pop(f)
      contigtemp.next.pop(i)
      contigtemp.nextdirection.pop(i)
      contigtemp.nextdistance.pop(i)
      break
 if len(contigtemp.pre) > 1:
  [contigtemp.predistance,contigtemp.pre,contigtemp.predirection]=sorttuple([contigtemp.predistance,contigtemp.pre,contigtemp.predirection])
  for i in range(len(contigtemp.pre)-1,0,-1):
   if  contigtemp.predirection[i]:
    for j in range(i):
     if contigtemp.predirection[j]:
      p =findpair(contigtemp.pre[j].pre,contigtemp.pre[j].predirection,contigtemp.pre[i],True)
      if p >=0:
       pretemp=contigtemp.pre[i]
       f =findpair(pretemp.next,pretemp.nextdirection,contigtemp,True)
       pretemp.next.pop(f)
       pretemp.nextdirection.pop(f)
       pretemp.nextdistance.pop(f)
       contigtemp.pre.pop(i)
       contigtemp.predirection.pop(i)
       contigtemp.predistance.pop(i)
       break
     else:
      p =findpair(contigtemp.pre[j].next,contigtemp.pre[j].nextdirection,contigtemp.pre[i],False)
      if p >=0:
       pretemp=contigtemp.pre[i]
       f =findpair(pretemp.next,pretemp.nextdirection,contigtemp,True)
       pretemp.next.pop(f)
       pretemp.nextdirection.pop(f)
       pretemp.nextdistance.pop(f)
       contigtemp.pre.pop(i)
       contigtemp.predirection.pop(i)
       contigtemp.predistance.pop(i)
       break
     if contigtemp.pre[j]==contigtemp.pre[i]:
      pretemp=contigtemp.pre[i]
      f =findpair(pretemp.next,pretemp.nextdirection,contigtemp,True)
      pretemp.next.pop(f)
      pretemp.nextdirection.pop(f)
      pretemp.nextdistance.pop(f)
      contigtemp.pre.pop(i)
      contigtemp.predirection.pop(i)
      contigtemp.predistance.pop(i)
      break
   else:
    for j in range(i):
     if contigtemp.predirection[j]:
      p =findpair(contigtemp.pre[j].pre,contigtemp.pre[j].predirection,contigtemp.pre[i],False)
      if p >=0:
       pretemp=contigtemp.pre[i]
       f =findpair(pretemp.pre,pretemp.predirection,contigtemp,False)
       pretemp.pre.pop(f)
       pretemp.predirection.pop(f)
       pretemp.predistance.pop(f)
       contigtemp.pre.pop(i)
       contigtemp.predirection.pop(i)
       contigtemp.predistance.pop(i)
       break
     else:
      p =findpair(contigtemp.pre[j].next,contigtemp.pre[j].nextdirection,contigtemp.pre[i],True)
      if p >=0:
       pretemp=contigtemp.pre[i]
       f =findpair(pretemp.pre,pretemp.predirection,contigtemp,False)
       pretemp.pre.pop(f)
       pretemp.predirection.pop(f)
       pretemp.predistance.pop(f)
       contigtemp.pre.pop(i)
       contigtemp.predirection.pop(i)
       contigtemp.predistance.pop(i)
       break
     if contigtemp.pre[j]==contigtemp.pre[i]:
      pretemp=contigtemp.pre[i]
      f =findpair(pretemp.pre,pretemp.predirection,contigtemp,False)
      pretemp.pre.pop(f)
      pretemp.predirection.pop(f)
      pretemp.predistance.pop(f)
      contigtemp.pre.pop(i)
      contigtemp.predirection.pop(i)
      contigtemp.predistance.pop(i)
      break
 return contigtemp

def reportlist(a):
 print 'total:',len(a)
 print 'max:', max(a)
 print 'min:', min(a)
 aa =sum(a)/len(a)
 print 'average:',aa
 print 'average count:',a.count(aa)
 ma =medium(a)
 print 'medium:',ma
 print 'medium count:',a.count(ma)
 [fa,ca]=findmaxcount(a)
 print 'frequent:',fa
 print 'frequent count:',ca
 at=int(ma*1.2)
 ab=int(ma*0.8)
 b= [c for c in a if (c>=ab and c <= at) or ( c>= at and c <= ab) ] 
 bb=medium(b)
# bb =sum(b)/len(b)
 print 'insert size:',bb
 print 'count:',b.count(bb)
 return bb

def matepair(contigpool,nodepool,readpool,insertsize,insertstd):
 gv=globals()
 print 's_number',s_number
 print 'remove transitive method:', gv['removetransitivemethod']
 if gv['removetransitivemethod']=='removetransitive':
  gv['removetransitivemethod']=removetransitive
 if gv['removetransitivemethod']=='removetransitiveandconflict':
  gv['removetransitivemethod']=removetransitiveandconflict
 if gv['removetransitivemethod']=='removetransitiveconflict':
  gv['removetransitivemethod']=removetransitiveconflict
 if gv['removetransitivemethod']=='removetransitiveconflictcond':
  gv['removetransitivemethod']=removetransitiveconflictcond
 round=0
 contigpool = ssetnodeinfo( contigpool)
#  gv['insertsize'] =estimateinsertsize(contigpool,nodepool,readpool)
 pl=len(contigpool)
 contigpool=greedymatepair(contigpool,nodepool,readpool,insertsize,insertstd)
 l=len(contigpool)
 while l<pl:
  round+=1
  pl=l
  contigpool = ssetnodeinfo( contigpool)
  contigpool=greedymatepair(contigpool,nodepool,readpool,insertsize,insertstd)
  l=len(contigpool)
 print 'round:',round
 return contigpool

def greedymatepair(contigpool,nodepool,readpool,insertsize,insertstd):
 contigpool= linkmatepair(contigpool,nodepool,readpool,insertsize)
 contigpool=setinsertlink(contigpool,insertsize,insertstd)
 contigpool = [removetransitivemethod(contigtemp) for contigtemp in contigpool]
 contigpool=insertcontig(contigpool)
 return contigpool

def matepair_p(contigpool,nodepool,readpool,insertsize,insertstd):
 gv=globals()
 print 's_number',s_number
 round=0
 contigpool = ssetnodeinfo( contigpool)
 pl=len(contigpool)
 contigpool=prograsivematepair(contigpool,nodepool,readpool,insertsize,insertstd)
 l=len(contigpool)
 while l<pl:
  round+=1
  pl=l
  contigpool = ssetnodeinfo( contigpool)
  contigpool=prograsivematepair(contigpool,nodepool,readpool,insertsize,insertstd)
  l=len(contigpool)
 print 'round:',round
 return contigpool

def prograsivematepair(contigpool,nodepool,readpool,insertsize,insertstd):
 contigpool= linkmatepair(contigpool,nodepool,readpool,insertsize)
 contigpool=prograsivelink(contigpool,insertsize,insertstd)
# contigpool=setinsertlink(contigpool)
 contigpool=insertcontig(contigpool)
 return contigpool

def linkmatepair(contigpool,nodepool,readpool,insertsize):
 contigset= set(contigpool)
 for temp in contigpool:
  temp.fresh=True
  temp.pre=[]
  temp.predirection=[]
  temp.next=[]
  temp.nextdirection=[]
  temp.nextdistance=[]
  temp.predistance=[]
 for i in range(0,len(nodepool),4):
  (temp12,c1,cp1)=nodepool[i]
  (temp12,c2,cp2)=nodepool[i+1]
  if (c1 not in contigset) or (c1 is not c2) or (abs(cp2-cp1)!= len(readpool[i/2])-k_mer):
   continue
  temp12=cp2-cp1
  (temp34,c3,cp3)=nodepool[i+2]
  (temp34,c4,cp4)=nodepool[i+3]
  if (c3 not in contigset) or (c4 is not c3) or (abs(cp4-cp3)!= len(readpool[(i/2)+1])-k_mer):
   continue
  temp34=cp4-cp3
  cl1=len(c1.sequence) +1 - k_mer-cp1
  cl4=len(c4.sequence) +1 -k_mer-cp4
# next true
#  if cl1<= insertsize:
#   if cp4 < insertsize:
#  if cl1+cp4 <= insertsize:
  if (temp12>0) and (temp34>0) and ((c1 is not c3) or (cp1>cp3)):
   temp=cl1+cp4
#   if temp > insertsize:
#    continue
   temp =1+ insertsize-temp -k_mer  
   d=findpair(c2.next,c2.nextdirection,c3,True)
   if d== -1:
    c2.next.append(c3)
    c2.nextdirection.append(True)
    c2.nextdistance.append([temp])
    c3.pre.append(c2)
    c3.predirection.append(True)
    c3.predistance.append([temp])
   else:
    c2.nextdistance[d].append(temp)
    d=findpair(c3.pre,c3.predirection,c2,True)
    c3.predistance[d].append(temp)
 # next False
#  if cl1+cl4 -1 <= insertsize:
  if (temp12>0) and (temp34<=0) and (c3 is not c1):
   temp=cl1+cl4-1
#   if temp > insertsize:
#    continue
   temp =1+ insertsize-temp -k_mer  
   d=findpair(c2.next,c2.nextdirection,c3,False)
   if d== -1:
    c2.next.append(c3)
    c2.nextdirection.append(False)
    c2.nextdistance.append([temp])
    c3.next.append(c2)
    c3.nextdirection.append(False)
    c3.nextdistance.append([temp])
   else:
    c2.nextdistance[d].append(temp)
    d=findpair(c3.next,c3.nextdirection,c2,False)
    c3.nextdistance[d].append(temp)
# pre True
#  if cp1+cl4 <= insertsize:
  if (temp12<=0) and (temp34<=0) and ((c1 is not c3) or (cp3>cp1)):
   temp=cp1+cl4
#   if temp > insertsize:
#    continue
   temp =1+ insertsize-temp -k_mer  
   d=findpair(c2.pre,c2.predirection,c3,True)
   if d== -1:
    c2.pre.append(c3)
    c2.predirection.append(True)
    c2.predistance.append([temp])
    c3.next.append(c2)
    c3.nextdirection.append(True)
    c3.nextdistance.append([temp])
   else:
    c2.predistance[d].append(temp)
    d=findpair(c3.next,c3.nextdirection,c2,True)
    c3.nextdistance[d].append(temp)
# pre False
#  if cp1+cp4 +1<= insertsize:
  if (temp12<=0) and (temp34>0) and (c3 is not c1):
   temp=cp1+cp4+1
#   if temp > insertsize:
#    continue
   temp =1+ insertsize-temp -k_mer  
   d=findpair(c2.pre,c2.predirection,c3,False)
   if d== -1:
    c2.pre.append(c3)
    c2.predirection.append(False)
    c2.predistance.append([temp])
    c3.pre.append(c2)
    c3.predirection.append(False)
    c3.predistance.append([temp])
   else:
    c2.predistance[d].append(temp)
    d=findpair(c3.pre,c3.predirection,c2,False)
    c3.predistance[d].append(temp)
 return contigpool

def prograsivelink(contigpool,insertsize,insertstd):
 print 'trace:'
 stacontig(contigpool)
 cond=k_mer-4
# condition=-(insertsize*0.2)
 condition=-insertstd
 kpp=k_mer+1
 for j in range(len(contigpool)):
  contigtemp=contigpool[j]
  contigtemp.id=j
  for i in range(len(contigtemp.predistance)-1,-1,-1):
   temp=len(contigtemp.predistance[i])
   temptemp=medium(contigtemp.predistance[i])
   if temp < s_number or temptemp <= condition:
    contigtemp.pre.pop(i)
    contigtemp.predirection.pop(i)
    contigtemp.predistance.pop(i)
   else:
    if temptemp<=cond:
     temptemp=overlappre(contigtemp,contigtemp.pre[i],contigtemp.predirection[i],cond)
    if (temptemp>cond and temptemp < kpp) or temptemp ==0:
     temptemp = kpp
    contigtemp.predistance[i]=[temp,temptemp]
  for i in range(len(contigtemp.nextdistance)-1,-1,-1):
   temp=len(contigtemp.nextdistance[i])
   temptemp=medium(contigtemp.nextdistance[i])
   if temp < s_number or temptemp <= condition:
    contigtemp.next.pop(i)
    contigtemp.nextdirection.pop(i)
    contigtemp.nextdistance.pop(i)
   else:
    if temptemp<=cond:
     temptemp=overlapnext(contigtemp,contigtemp.next[i],contigtemp.nextdirection[i],cond)
    if (temptemp>cond and temptemp < kpp) or temptemp ==0:
     temptemp = kpp
    contigtemp.nextdistance[i]=[temp,temptemp]
 print 'trace:'
 stacontig(contigpool)
 if len(contigpool):
  totallink=sum([len(contigtemp.pre) + len(contigtemp.next) for contigtemp in contigpool ])
 else:
  totallink=0
 print 'total link:', totallink
 totallink=[]
 for contigtemp in contigpool:
  for i in range(len(contigtemp.next)):
   if contigtemp.nextdirection[i] == True:
    totallink.append((contigtemp.nextdistance[i][0],contigtemp.nextdistance[i][1],contigtemp,contigtemp.next[i],contigtemp.nextdirection[i]))
   else:
    if contigtemp.id < contigtemp.next[i].id:
     totallink.append((contigtemp.nextdistance[i][0],contigtemp.nextdistance[i][1],contigtemp,contigtemp.next[i],contigtemp.nextdirection[i]))
  for i in range(len(contigtemp.pre)):
   if contigtemp.predirection[i] == False:
    if contigtemp.id < contigtemp.pre[i].id:
     totallink.append((contigtemp.predistance[i][0],contigtemp.predistance[i][1],contigtemp.pre[i],contigtemp,contigtemp.predirection[i]))
 totallink.sort(reverse=True)
 print 'half link:', len(totallink)
 for contigtemp in contigpool:
  contigtemp.next=[]
  contigtemp.nextdirection=[]
  contigtemp.nextdistance=[]
  contigtemp.pre=[]
  contigtemp.predirection=[]
  contigtemp.predistance=[]
 linklink=0
 for temp in totallink:
  contigtemp2=temp[2]
  contigtemp3=temp[3]
  if temp[4] == True:
   if (len(contigtemp2.next)==0) and (len(contigtemp3.pre)==0):
    linklink+=1
    contigtemp2.next.append(contigtemp3)
    contigtemp2.nextdirection.append(True)
    contigtemp2.nextdistance.append(temp[1])
    contigtemp3.pre.append(contigtemp2)
    contigtemp3.predirection.append(True)
    contigtemp3.predistance.append(temp[1])
  elif contigtemp2.id < contigtemp3.id:
   if (len(contigtemp2.next)==0) and (len(contigtemp3.next)==0):
    linklink+=1
    contigtemp2.next.append(contigtemp3)
    contigtemp2.nextdirection.append(False)
    contigtemp2.nextdistance.append(temp[1])
    contigtemp3.next.append(contigtemp2)
    contigtemp3.nextdirection.append(False)
    contigtemp3.nextdistance.append(temp[1])
  else:
   if (len(contigtemp2.pre)==0) and (len(contigtemp3.pre)==0):
    linklink+=1
    contigtemp2.pre.append(contigtemp3)
    contigtemp2.predirection.append(False)
    contigtemp2.predistance.append(temp[1])
    contigtemp3.pre.append(contigtemp2)
    contigtemp3.predirection.append(False)
    contigtemp3.predistance.append(temp[1])
 print 'use',linklink,'links'
 print 'trace:'
 stacontig(contigpool)
 return contigpool

def setprograsivelink(contigpool):
 for contigtemp in contigpool:
  temp=0
  for i in range(len(contigtemp.predistance)-1,-1,-1):
   temptemp=len(contigtemp.predistance[i])
   if temptemp < s_number:
    contigtemp.pre.pop(i)
    contigtemp.predirection.pop(i)
    contigtemp.predistance.pop(i)
   else:
    temp+= temptemp
  for i in range(len(contigtemp.nextdistance)-1,-1,-1):
   temptemp=len(contigtemp.nextdistance[i])
   if temptemp < s_number:
    contigtemp.next.pop(i)
    contigtemp.nextdirection.pop(i)
    contigtemp.nextdistance.pop(i)
   else:
    temp+= temptemp
  contigtemp.id=temp
 def cmp(a,b):
  if a.id > b.id:
   return -1
  return 1
 contigpool.sort(cmp)
 print 'trace:'
 stacontig(contigpool)
 showcontigstructure(contigpool)
 for contigtemp in contigpool:
  temp=len(contigtemp.predistance)
  if temp > 1:
   temptemp=0
   for i in range(1,temp):
    if len(contigtemp.predistance[i]) > len(contigtemp.predistance[temptemp]):
     temptemp=i
   for i in range(temp-1,-1,-1):
    if i != temptemp:
     cp=contigtemp.pre[i]
     if contigtemp.predirection[i]:
      p=findpair(cp.next,cp.nextdirection,contigtemp,True)
      if p >=0:
       cp.next.pop(p)
       cp.nextdirection.pop(p)
       cp.nextdistance.pop(p)
     else:
      p=findpair(cp.pre,cp.predirection,contigtemp,False)
      if p >=0:
       cp.pre.pop(p)
       cp.predirection.pop(p)
       cp.predistance.pop(p)
     contigtemp.pre.pop(i)
     contigtemp.predirection.pop(i)
     contigtemp.predistance.pop(i)
  temp=len(contigtemp.nextdistance)
  if temp > 1:
   temptemp=0
   for i in range(1,temp):
    if len(contigtemp.nextdistance[i]) > len(contigtemp.nextdistance[temptemp]):
     temptemp=i
   for i in range(temp-1,-1,-1):
    if i != temptemp:
     cp=contigtemp.next[i]
     if contigtemp.nextdirection[i]:
      p=findpair(cp.pre,cp.predirection,contigtemp,True)
      if p >=0:
       cp.pre.pop(p)
       cp.predirection.pop(p)
       cp.predistance.pop(p)
     else:
      p=findpair(cp.next,cp.nextdirection,contigtemp,False)
      if p >=0:
       cp.next.pop(p)
       cp.nextdirection.pop(p)
       cp.nextdistance.pop(p)
     contigtemp.next.pop(i)
     contigtemp.nextdirection.pop(i)
     contigtemp.nextdistance.pop(i)
 return contigpool

def greedymatepair_s(contigpool,nodepool,readpool):
 contigset= set(contigpool)
 adjust=[]
 for temp in contigpool:
  temp.fresh=True
  temp.pre=[]
  temp.predirection=[]
  temp.next=[]
  temp.nextdirection=[]
  temp.nextdistance=[]
  temp.predistance=[]
# contigpool=linkscaffold(contigpool)
 for i in range(0,len(nodepool),4):
  (temp12,c1,cp1)=nodepool[i]
  (temp12,c2,cp2)=nodepool[i+1]
  if (c1 not in contigset) or (c1 != c2) or (abs(cp2-cp1)!= len(readpool[i/2])-k_mer):
   continue
  temp12=cp2-cp1
  (temp34,c3,cp3)=nodepool[i+2]
  (temp34,c4,cp4)=nodepool[i+3]
  if (c3 not in contigset) or (c3 == c1) or (c4 != c3) or (abs(cp4-cp3)!= len(readpool[(i/2)+1])-k_mer):
   continue
  temp34=cp4-cp3
  cl2=len(c2.sequence) +1 - k_mer-cp2
  cl3=len(c3.sequence) +1 -k_mer-cp3
# next true
#  if cl2<= insertsize:
#   if cp3 < insertsize:
#  if cl2+cp3 <= insertsize:
  if (temp12>0) and (temp34>0):
   temp =1+ insertsize- (cl2+cp3)
   d=findpair(c2.next,c2.nextdirection,c3,True)
   if d== -1:
    c2.next.append(c3)
    c2.nextdirection.append(True)
    c2.nextdistance.append([temp])
    c3.pre.append(c2)
    c3.predirection.append(True)
    c3.predistance.append([temp])
   else:
    if isinstance(c2.nextdistance[d],list):
     c2.nextdistance[d].append(temp)
    else:
     c2.nextdistance[d]+=1
     adjust.append(temp-1)
    d=findpair(c3.pre,c3.predirection,c2,True)
    if isinstance(c3.predistance[d],list):
     c3.predistance[d].append(temp)
    else:
     c3.predistance[d]+=1
 # next False
#  if cl2+cl3 -1 <= insertsize:
  if (temp12>0) and (temp34<=0):
   temp =1+ insertsize- (cl2+cl3-1)
   d=findpair(c2.next,c2.nextdirection,c3,False)
   if d== -1:
    c2.next.append(c3)
    c2.nextdirection.append(False)
    c2.nextdistance.append([temp])
    c3.next.append(c2)
    c3.nextdirection.append(False)
    c3.nextdistance.append([temp])
   else:
    if isinstance(c2.nextdistance[d],list):
     c2.nextdistance[d].append(temp)
    else:
     c2.nextdistance[d]+=1
     adjust.append(temp-1)
    d=findpair(c3.next,c3.nextdirection,c2,False)
    if isinstance(c3.nextdistance[d],list):
     c3.nextdistance[d].append(temp)
    else:
     c3.nextdistance[d]+=1
# pre True
#  if cp2+cl3 <= insertsize:
  if (temp12<=0) and (temp34<=0):
   temp =1+ insertsize- (cp2+cl3)
   d=findpair(c2.pre,c2.predirection,c3,True)
   if d== -1:
    c2.pre.append(c3)
    c2.predirection.append(True)
    c2.predistance.append([temp])
    c3.next.append(c2)
    c3.nextdirection.append(True)
    c3.nextdistance.append([temp])
   else:
    if isinstance(c2.predistance[d],list):
     c2.predistance[d].append(temp)
    else:
     c2.predistance[d]+=1
     adjust.append(temp-1)
    d=findpair(c3.next,c3.nextdirection,c2,True)
    if isinstance(c3.nextdistance[d],list):
     c3.nextdistance[d].append(temp)
    else:
     c3.nextdistance[d]+=1
# pre False
#  if cp2+cp3 +1<= insertsize:
  if (temp12<=0) and (temp34>0):
   temp =1+ insertsize- (cp2+cp3+1)
   d=findpair(c2.pre,c2.predirection,c3,False)
   if d== -1:
    c2.pre.append(c3)
    c2.predirection.append(False)
    c2.predistance.append([temp])
    c3.pre.append(c2)
    c3.predirection.append(False)
    c3.predistance.append([temp])
   else:
    if isinstance(c2.predistance[d],list):
     c2.predistance[d].append(temp)
    else:
     c2.predistance[d]+=1
     adjust.append(temp-1)
    d=findpair(c3.pre,c3.predirection,c2,False)
    if isinstance(c3.predistance[d],list):
     c3.predistance[d].append(temp)
    else:
     c3.predistance[d]+=1
 if len(adjust):
#  adjust=reportlist(adjust)
  adjust=0
#  adjust=medium(adjust)
 else:
  adjust=0
  print 'adjust:', adjust
 print 'insertsize:',insertsize
 globals()['insertsize']-=adjust
 print 'new insertsize:',insertsize
 contigpool=setinsertlink(contigpool,adjust)
 contigpool = [removetransitivemethod(contigtemp) for contigtemp in contigpool]
 contigpool=insertcontig(contigpool)
 return contigpool

def setinsertlink(contigpool,insertsize,insertstd):
# cond=(insertsize-k_mer)/10
 cond=k_mer-4
#condition=-(insertsize*0.2)
 condition=-insertstd
 for temp in contigpool:
  for i in range(len(temp.predistance)-1,-1,-1):
   temptemp=medium(temp.predistance[i])
   if len(temp.predistance[i]) >= s_number and temptemp > condition:
    if temptemp<=cond:
     temptemp=overlappre(temp,temp.pre[i],temp.predirection[i],cond)
#      if not overlappre(temp,temp.pre[i],temp.predirection[i]):
#       temp.predistance.pop(i)
#       temp.pre.pop(i)
#       temp.predirection.pop(i)
#       continue
    if (temptemp>cond and temptemp<=k_mer) or temptemp==0:
     temptemp=k_mer+1
    temp.predistance[i]=temptemp
    continue
   temp.predistance.pop(i)
   temp.pre.pop(i)
   temp.predirection.pop(i)
  for i in range(len(temp.nextdistance)-1,-1,-1):
   temptemp=medium(temp.nextdistance[i])
   if len(temp.nextdistance[i]) >= s_number and temptemp > condition:
    if temptemp<=cond:
     temptemp=overlapnext(temp,temp.next[i],temp.nextdirection[i])
#      if not overlapnext(temp,temp.next[i],temp.nextdirection[i]):
#       temp.nextdistance.pop(i)
#       temp.next.pop(i)
#       temp.nextdirection.pop(i)
#       continue
    if (temptemp>cond and temptemp<=k_mer) or temptemp==0:
     temptemp=k_mer+1
    temp.nextdistance[i]=temptemp
    continue
   temp.nextdistance.pop(i)
   temp.next.pop(i)
   temp.nextdirection.pop(i)
 return contigpool

def setmatelink(contigpool):
# cond=(insertsize-k_mer)/10
 cond=1
 for temp in contigpool:
  for i in range(len(temp.predistance)-1,-1,-1):
   if len(temp.predistance[i]) >= c_number:
    temptemp=medium(temp.predistance[i])
    if temptemp[0]<=cond:
     if not overlappre(temp,temp.pre[i],temp.predirection[i]):
      temp.predistance.pop(i)
      temp.pre.pop(i)
      temp.predirection.pop(i)
      continue
    temp.predistance[i]=temptemp
    continue
   temp.predistance.pop(i)
   temp.pre.pop(i)
   temp.predirection.pop(i)
  for i in range(len(temp.nextdistance)-1,-1,-1):
   if len(temp.nextdistance[i]) >= c_number:
    temptemp=medium(temp.nextdistance[i])
    if temptemp[0]<=cond:
     if not overlapnext(temp,temp.next[i],temp.nextdirection[i]):
      temp.nextdistance.pop(i)
      temp.next.pop(i)
      temp.nextdirection.pop(i)
      continue
    temp.nextdistance[i]=temptemp
    continue
   temp.nextdistance.pop(i)
   temp.next.pop(i)
   temp.nextdirection.pop(i)
 return contigpool

def setinsertlink_s(contigpool,adjust):
 for temp in contigpool:
  for i in range(len(temp.predistance)-1,-1,-1):
   if isinstance(temp.predistance[i],list) and (len(temp.predistance[i]) >= c_number):
    temptemp=medium(temp.predistance[i]) - adjust
    if temptemp>0:
     temp.predistance[i]=temptemp
     continue
   elif temp.predistance[i] >= c_number:
    temp.predistance[i]=1
    continue
   temp.predistance.pop(i)
   temp.pre.pop(i)
   temp.predirection.pop(i)
  for i in range(len(temp.nextdistance)-1,-1,-1):
   if isinstance(temp.nextdistance[i],list) and (len(temp.nextdistance[i]) >= c_number):
    temptemp=medium(temp.nextdistance[i]) - adjust
    if temptemp>0:
     temp.nextdistance[i]=temptemp
     continue
   elif temp.nextdistance[i] >= c_number:
    temp.nextdistance[i]=1
    continue
   temp.nextdistance.pop(i)
   temp.next.pop(i)
   temp.nextdirection.pop(i)
 return contigpool

def checkcontignext(contigtemp):
 if len(contigtemp.next) == 1:
  next = contigtemp.next[0]
  nextdirection= contigtemp.nextdirection[0]
  if nextdirection:
   pre = next.pre
  else:
   pre = next.next
  if len(pre)==1:
   if next is not contigtemp:
    return True
 return False

def checkcontigpre(contigtemp):
 if len(contigtemp.pre) == 1:
  pre = contigtemp.pre[0]
  predirection= contigtemp.predirection[0]
  if predirection:
   next = pre.next
  else:
   next = pre.pre
  if len(next)==1:
   if pre is not contigtemp:
    return True
 return False

def overlapnext(c1,c2,dir,dis=1):
 kmm=k_mer-1
 ppp=p_mer+1
 if dir:
  if dis == 1:
   return withindistance(c1.sequence[-kmm:],c2.sequence[:kmm],p_mer)
  else:
   for i in range(1,dis+1,1):
    kmm=k_mer-1
    if withindistance(c1.sequence[-kmm:],c2.sequence[:kmm],(ppp*kmm)/k_mer):
     return i
   return 0
 else:
  if dis == 1:
   return withindistance(c1.sequence[-kmm:],c2.reverse[:kmm],p_mer)
  else:
   for i in range(1,dis+1,1):
    kmm=k_mer-1
    if withindistance(c1.sequence[-kmm:],c2.reverse[:kmm],(ppp*kmm)/k_mer):
     return i
   return 0

def overlappre(c1,c2,dir,dis=1):
 kmm=k_mer-1
 ppp=p_mer+1
 if dir:
  if dis == 1:
   return withindistance(c2.sequence[-kmm:],c1.sequence[:kmm],p_mer)
  else:
   for i in range(1,dis+1,1):
    kmm=k_mer-1
    if withindistance(c2.sequence[-kmm:],c1.sequence[:kmm],(ppp*kmm)/k_mer):
     return i
   return 0
 else:
  if dis == 1:
   return withindistance(c2.reverse[-kmm:],c1.sequence[:kmm],p_mer)
  else:
   for i in range(1,dis+1,1):
    kmm=k_mer-1
    if withindistance(c2.reverse[-kmm:],c1.sequence[:kmm],(ppp*kmm)/k_mer):
     return i
   return 0

def matecontig(contigpool):
 pool=[]
 for   contigtemp in contigpool:
  if not contigtemp.fresh:
   continue
  if checkcontignext(contigtemp):
   tempsequence=contigtemp.nextdistance[0][1]
   contigtemp.nextdistance=contigtemp.nextdistance[0][0]
   if overlapnext(contigtemp,contigtemp.next[0],contigtemp.nextdirection[0]):
    contigtemp.nextdistance=1
   temp = contigtemp.nextdistance- k_mer
   if temp >0:
    contigtemp.nextdistance=k_mer
    for i in range(temp):
     contigtemp.nodelist.append( [0,None,None])
    contigtemp.sequence = contigtemp.sequence + tempsequence
    contigtemp.reverse =getreverse(tempsequence) + contigtemp.reverse
   elif contigtemp.nextdistance>1:
    print 'insert ',contigtemp.nextdistance
   temp=contigtemp.extendnext(contigtemp,True,contigtemp.nextdistance)
   while temp is not None:
    (current,samedirection)=temp
    current.fresh=False
    if checkcontignext(contigtemp):
     if samedirection:
      tempsequence=current.nextdistance[0][1]
      distance=current.nextdistance[0][0]
     else:
      tempsequence=getreverse(current.predistance[0][1])
      distance=current.predistance[0][0]
     if overlapnext(contigtemp,contigtemp.next[0],contigtemp.nextdirection[0]):
      distance=1
    else:
     distance=1
    temp = distance- k_mer
    if temp >0:
     distance=k_mer
     for i in range(temp):
      contigtemp.nodelist.append( [0,None,None])
     contigtemp.sequence = contigtemp.sequence + tempsequence
     contigtemp.reverse =getreverse(tempsequence)+ contigtemp.reverse
    elif distance>1:
     print 'insert ',distance
    temp=contigtemp.extendnext(current,samedirection, distance)
  if checkcontigpre(contigtemp):
   tempsequence=contigtemp.predistance[0][1]
   contigtemp.predistance=contigtemp.predistance[0][0]
   if overlappre(contigtemp,contigtemp.pre[0],contigtemp.predirection[0]):
    contigtemp.predistance=1
   temp = contigtemp.predistance- k_mer
   if temp >0:
    contigtemp.predistance=k_mer
    for i in range(temp):
     contigtemp.nodelist.insert(0, [0,None,None])
    contigtemp.sequence = tempsequence + contigtemp.sequence
    contigtemp.reverse = contigtemp.reverse + getreverse(tempsequence)
   elif contigtemp.predistance>1:
    print 'insert ',contigtemp.predistance
   temp=contigtemp.extendpre(contigtemp,True,contigtemp.predistance)
   while temp is not None:
    (current,samedirection)=temp
    current.fresh=False
    if checkcontigpre(contigtemp):
     if samedirection:
      tempsequence=current.predistance[0][1]
      distance=current.predistance[0][0]
     else:
      tempsequence=getreverse(current.nextdistance[0][1])
      distance=current.nextdistance[0][0]
     if overlappre(contigtemp,contigtemp.pre[0],contigtemp.predirection[0]):
      distance=1
    else:
     distance=1
    temp = distance- k_mer
    if temp >0:
     distance=k_mer
     for i in range(temp):
      contigtemp.nodelist.insert(0, [0,None,None])
     contigtemp.sequence = tempsequence + contigtemp.sequence
     contigtemp.reverse = contigtemp.reverse +getreverse(tempsequence)
    elif distance>1:
     print 'insert ',distance
    temp=contigtemp.extendpre(current,samedirection, distance)
  contigtemp.fresh=False
  contigtemp.weight=contigtemp.sum/float(len(contigtemp.sequence)+1-k_mer)
  pool.append(contigtemp)
 return pool

def insertcontig(contigpool):
 pool=[]
 for   contigtemp in contigpool:
  if not contigtemp.fresh:
   continue
  if checkcontignext(contigtemp):
   contigtemp.nextdistance=contigtemp.nextdistance[0]
   if overlapnext(contigtemp,contigtemp.next[0],contigtemp.nextdirection[0]):
    contigtemp.nextdistance=1
   temp = contigtemp.nextdistance- k_mer
   if temp >0:
    contigtemp.nextdistance=k_mer
    for i in range(temp):
     contigtemp.nodelist.append( [0,None,None])
    contigtemp.sequence = contigtemp.sequence + ('N')*temp
    contigtemp.reverse =('N')*temp + contigtemp.reverse
   elif contigtemp.nextdistance>1:
    print 'insert ',contigtemp.nextdistance
   temp=contigtemp.extendnext(contigtemp,True,contigtemp.nextdistance)
   while temp is not None:
    (current,samedirection)=temp
    current.fresh=False
    if checkcontignext(contigtemp):
     if samedirection:
      distance=current.nextdistance[0]
     else:
      distance=current.predistance[0]
     if overlapnext(contigtemp,contigtemp.next[0],contigtemp.nextdirection[0]):
      distance=1
    else:
     distance=1
    temp = distance- k_mer
    if temp >0:
     distance=k_mer
     for i in range(temp):
      contigtemp.nodelist.append( [0,None,None])
     contigtemp.sequence = contigtemp.sequence + ('N')*temp
     contigtemp.reverse =('N')*temp+ contigtemp.reverse
    elif distance>1:
     print 'insert ',distance
    temp=contigtemp.extendnext(current,samedirection, distance)
  if checkcontigpre(contigtemp):
   contigtemp.predistance=contigtemp.predistance[0]
   if overlappre(contigtemp,contigtemp.pre[0],contigtemp.predirection[0]):
    contigtemp.predistance=1
   temp = contigtemp.predistance- k_mer
   if temp >0:
    contigtemp.predistance=k_mer
    for i in range(temp):
     contigtemp.nodelist.insert(0, [0,None,None])
    contigtemp.sequence = ('N')*temp + contigtemp.sequence
    contigtemp.reverse = contigtemp.reverse + ('N')*temp
   elif contigtemp.predistance>1:
    print 'insert ',contigtemp.predistance
   temp=contigtemp.extendpre(contigtemp,True,contigtemp.predistance)
   while temp is not None:
    (current,samedirection)=temp
    current.fresh=False
    if checkcontigpre(contigtemp):
     if samedirection:
      distance=current.predistance[0]
     else:
      distance=current.nextdistance[0]
     if overlappre(contigtemp,contigtemp.pre[0],contigtemp.predirection[0]):
      distance=1
    else:
     distance=1
    temp = distance- k_mer
    if temp >0:
     distance=k_mer
     for i in range(temp):
      contigtemp.nodelist.insert(0, [0,None,None])
     contigtemp.sequence = ('N')*temp + contigtemp.sequence
     contigtemp.reverse = contigtemp.reverse +('N')*temp
    elif distance>1:
     print 'insert ',distance
    temp=contigtemp.extendpre(current,samedirection, distance)
  contigtemp.fresh=False
  contigtemp.weight=contigtemp.sum/float(len(contigtemp.sequence)+1-k_mer)
  pool.append(contigtemp)
 return pool

def matepair_old(contigpool,nodepool, nodelength=100):
 nsequence=['N']*nodelength
 contigset= set(contigpool)
 pairlength= estimatepairlength(contigpool,nodepool)
 for temp in contigpool:
  temp.fresh=True
  temp.nextdistance=nodelength
  temp.predistance=nodelength
 for i in range(0,len(nodepool),2):
  (c1,cp1,cd1)=nodepool[i].contiginfo
  if c1 not in contigset:
   continue
  (c2,cp2,cd2)=nodepool[i+1].contiginfo
  if c2 not in contigset or c1 == c2:
   continue
  cl1=len(c1.sequence) +1 - k_mer-cp1
  cl2=len(c2.sequence) +1 -k_mer-cp2
# next true
#  if cl1< nodelength:
#   if cp2 < nodelength:
#  if cl1+cp2 < nodelength:
  if (cd1==1) and (cd2==1):
   temp=cl1+cp2
   if temp >= pairlength:
    continue
   temp = pairlength-temp   
   if (temp < c1.nextdistance) and (temp < c2.predistance):
    otemp=k_mer-temp
    if otemp >0:
     otemp=inoverlap(c1.sequence,c2.sequence,otemp)
     if otemp>0:
      temp=k_mer-otemp
     else:
      continue
    if c1.nextdistance == nodelength:
     c1.next.append(c2)
     c1.nextdirection.append(True)
    else:
     c1n=c1.next[0]
     if c1.nextdirection[0]:
#      if c1n.pre[0] ==c1:
      c1n.pre.pop()
      c1n.predirection.pop()
      c1n.predistance=nodelength
     else:
      c1n.next.pop()
      c1n.nextdirection.pop()
      c1n.nextdistance=nodelength
     c1.next[0] = c2
     c1.nextdirection[0] = True
    if c2.predistance == nodelength:
     c2.pre.append(c1)
     c2.predirection.append(True)
    else:
     c2p=c2.pre[0]
     if c2.predirection[0]:
      c2p.next.pop()
      c2p.nextdirection.pop()
      c2p.nextdistance=nodelength
     else:
      c2p.pre.pop()
      c2p.predirection.pop()
      c2p.predistance=nodelength
     c2.pre[0]=c1
     c2.predirection[0]=True
    c1.nextdistance=temp
    c2.predistance=temp
    c1.nextsequence=nsequence[k_mer:temp]
    c2.presequence =nsequence[k_mer:temp]
 # next False
#  if cl1+cl2 -1 < nodelength:
  if (cd1==1) and (cd2==0):
   temp=cl1+cl2 -1
   if temp >= pairlength:
    continue
   temp = pairlength-temp   
   if (temp < c1.nextdistance) and (temp < c2.nextdistance):
    otemp=k_mer-temp
    if otemp >0:
     otemp=inoverlap(c1.sequence,c2.reverse,otemp)
     if otemp>0:
      temp=k_mer-otemp
     else:
      continue
    if c1.nextdistance == nodelength:
     c1.next.append(c2)
     c1.nextdirection.append(False)
    else:
     c1n=c1.next[0]
     if c1.nextdirection[0]:
#      if c1n.pre[0] ==c1:
      c1n.pre.pop()
      c1n.predirection.pop()
      c1n.predistance=nodelength
     else:
      c1n.next.pop()
      c1n.nextdirection.pop()
      c1n.nextdistance=nodelength
     c1.next[0] = c2
     c1.nextdirection[0] = False
    if c2.nextdistance == nodelength:
     c2.next.append(c1)
     c2.nextdirection.append(False)
    else:
     c2n=c2.next[0]
     if c2.nextdirection[0]:
      c2n.pre.pop()
      c2n.predirection.pop()
      c2n.predistance=nodelength
     else:
      c2n.next.pop()
      c2n.nextdirection.pop()
      c2n.nextdistance=nodelength
     c2.next[0]=c1
     c2.nextdirection[0]=False
    c1.nextdistance=temp
    c2.nextdistance=temp
    c1.nextsequence=nsequence[k_mer:temp]
    c2.nextsequence =nsequence[k_mer:temp]
# pre True
#  if cp1+cl2 < nodelength:
  if (cd1==0) and (cd2==0):
   temp=cp1+cl2 
   if temp >= pairlength:
    continue
   temp = pairlength-temp   
   if (temp < c1.predistance) and (temp < c2.nextdistance):
    otemp=k_mer-temp
    if otemp >0:
     otemp=inoverlap(c2.sequence,c1.sequence,otemp)
     if otemp>0:
      temp=k_mer-otemp
     else:
      continue
    if c1.predistance == nodelength:
     c1.pre.append(c2)
     c1.predirection.append(True)
    else:
     c1p=c1.pre[0]
     if c1.predirection[0]:
#      if c1p.next[0] ==c1:
      c1p.next.pop()
      c1p.nextdirection.pop()
      c1p.nextdistance=nodelength
     else:
      c1p.pre.pop()
      c1p.predirection.pop()
      c1p.predistance=nodelength
     c1.pre[0] = c2
     c1.predirection[0] = True
    if c2.nextdistance == nodelength:
     c2.next.append(c1)
     c2.nextdirection.append(True)
    else:
     c2n=c2.next[0]
     if c2.nextdirection[0]:
      c2n.pre.pop()
      c2n.predirection.pop()
      c2n.predistance=nodelength
     else:
      c2n.next.pop()
      c2n.nextdirection.pop()
      c2n.nextdistance=nodelength
     c2.next[0]=c1
     c2.nextdirection[0]=True
    c1.predistance=temp
    c2.nextdistance=temp
    c2.nextsequence = nsequence[k_mer:temp]
    c1.presequence=nsequence[k_mer:temp]
# pre False
#  if cp1+cp2 +1< nodelength:
  if (cd1==0) and (cd2==1):
   temp=cp1+cp2 +1
   if temp >= pairlength:
    continue
   temp = pairlength-temp   
   if (temp < c1.predistance) and (temp < c2.predistance):
    otemp=k_mer-temp
    if otemp >0:
     otemp=inoverlap(c2.reverse,c1.sequence,otemp)
     if otemp>0:
      temp=k_mer-otemp
     else:
      continue
    if c1.predistance == nodelength:
     c1.pre.append(c2)
     c1.predirection.append(False)
    else:
     c1p=c1.pre[0]
     if c1.predirection[0]:
#      if c1p.next[0] ==c1:
      c1p.next.pop()
      c1p.nextdirection.pop()
      c1p.nextdistance=nodelength
     else:
      c1p.pre.pop()
      c1p.predirection.pop()
      c1p.predistance=nodelength
     c1.pre[0] = c2
     c1.predirection[0] = False
    if c2.predistance == nodelength:
     c2.pre.append(c1)
     c2.predirection.append(False)
    else:
     c2p=c2.pre[0]
     if c2.predirection[0]:
      c2p.next.pop()
      c2p.nextdirection.pop()
      c2p.nextdistance=nodelength
     else:
      c2p.pre.pop()
      c2p.predirection.pop()
      c2p.predistance=nodelength
     c2.pre[0]=c1
     c2.predirection[0]=False
    c1.predistance=temp
    c2.predistance=temp
    c2.presequence = nsequence[k_mer:temp]
    c1.presequence= nsequence[k_mer:temp]
 # fusecontig
 contigpool = fusecontig(contigpool, nodelength)
 return contigpool

def checkrepeat(contigtemp):
 lp=len(contigtemp.pre)
 ln=len(contigtemp.next)
 if (lp<2) and (ln<2):
  return False
 if ln==2 and contigtemp.next[0]is contigtemp.next[1] and contigtemp.nextdirection[0] != contigtemp.nextdirection[1] and contigtemp.weight >= (contigtemp.next[0].weight*2*repeatparameter):
  if repeattype >=2:
   return False
#  print 'next stem:'
  kmm=k_mer-1
  contignext=contigtemp.next[0]
  contignext.sequence= contigtemp.sequence[:-kmm] +contignext.sequence+contigtemp.reverse[kmm:]
  contignext.reverse= contigtemp.sequence[:-kmm] +contignext.reverse+contigtemp.reverse[kmm:]
  nextnodelist=contignext.nodelist
  for i in range(len(contigtemp.sequence)-kmm):
   nextnodelist.insert( 0,[0,None,None])
   nextnodelist.append([0,None,None])
  contignext.sum+=contigtemp.sum
  contignext.weight=contignext.sum/float(len(contignext.sequence) -kmm)
  return True
 if lp==2 and contigtemp.pre[0]is contigtemp.pre[1] and contigtemp.predirection[0] != contigtemp.predirection[1] and contigtemp.weight >= (contigtemp.pre[0].weight*2*repeatparameter):
  if repeattype >=2:
   return False
#  print 'pre stem:'
  kmm=k_mer-1
  contigpre=contigtemp.pre[0]
  contigpre.sequence= contigtemp.reverse[:-kmm] +contigpre.sequence+contigtemp.sequence[kmm:]
  contigpre.reverse= contigtemp.reverse[:-kmm] +contigpre.reverse+contigtemp.sequence[kmm:]
  prenodelist=contigpre.nodelist
  for i in range(len(contigtemp.sequence)-kmm):
   prenodelist.insert( 0,[0,None,None])
   prenodelist.append([0,None,None])
  contigpre.sum+=contigtemp.sum
  contigpre.weight=contigpre.sum/float(len(contigpre.sequence) -kmm)
  return True
 intercontig= set(contigtemp.pre) & set(contigtemp.next)
 if lp==2 and ln==2 and len(intercontig) ==1:
  intercontig= list(intercontig)[0]
  preindex= contigtemp.pre.index(intercontig)
  nextindex= contigtemp.next.index(intercontig)
  if intercontig is not contigtemp and contigtemp.predirection[preindex] == contigtemp.nextdirection[nextindex] and contigtemp.weight >= (intercontig.weight+ contigtemp.pre[1-preindex].weight)*repeatparameter and contigtemp.weight >= (intercontig.weight+ contigtemp.next[1-nextindex].weight)*repeatparameter:
#   print 'small wheel:'
   kmm=k_mer-1
   if contigtemp.predirection[preindex]:
    intercontig.sequence= contigtemp.sequence[:-kmm] +intercontig.sequence+contigtemp.sequence[kmm:]
    intercontig.reverse= contigtemp.reverse[:-kmm] +intercontig.reverse+contigtemp.reverse[kmm:]
   else:
    intercontig.sequence= contigtemp.reverse[:-kmm] +intercontig.sequence+contigtemp.reverse[kmm:]
    intercontig.reverse= contigtemp.sequence[:-kmm] +intercontig.reverse+contigtemp.sequence[kmm:]
   internodelist=intercontig.nodelist
   for i in range(len(contigtemp.sequence)-kmm):
    internodelist.insert( 0,[0,None,None])
    internodelist.append([0,None,None])
   intercontig.sum+=contigtemp.sum
   intercontig.weight=intercontig.sum/float(len(intercontig.sequence) -kmm)
   return True
  if intercontig is contigtemp and contigtemp.predirection[preindex] == True and contigtemp.nextdirection[nextindex] == True:
#and contigtemp.weight >= (intercontig.weight+ contigtemp.pre[1-preindex].weight)*repeatparameter and contigtemp.weight >= (intercontig.weight+ contigtemp.next[1-nextindex].weight)*repeatparameter:
#   print 'big wheel:'
   kmm=k_mer-1
   contigpre=contigtemp.pre[1-preindex]
   if contigtemp.predirection[1-preindex]:
    contigpre.sequence=contigpre.sequence+contigtemp.sequence[kmm:]
    contigpre.reverse= contigtemp.reverse[:-kmm] +contigpre.reverse
    contigpre.nodelist.extend(contigtemp.nodelist)
   else:
    contigpre.sequence= contigtemp.reverse[:-kmm] +contigpre.sequence
    contigpre.reverse=contigpre.reverse+contigtemp.sequence[kmm:]
    contigtemp.nodelist.reverse()
    contigpre.nodelist[:0]=contigtemp.nodelist
   contigpre.sum+=contigtemp.sum
   contigpre.weight=contigpre.sum/float(len(contigpre.sequence) -kmm)
   return True
 if repeattype >=1:
  return False
 presumcond=(lp> 1) and (contigtemp.weight >= sum([temp.weight for temp in contigtemp.pre ])*repeatparameter)
 nextsumcond=(ln> 1) and (contigtemp.weight >= sum([temp.weight for temp in contigtemp.next ])*repeatparameter)
 if presumcond and (lp >= ln or not(nextsumcond)):
#  print 'pre repeat:'
  kmm=k_mer-1
  if contigtemp in contigtemp.pre:
   lp-=1
  for i in range(len(contigtemp.pre)):
   contigpre=contigtemp.pre[i]
   if contigpre is contigtemp:
    continue
   prenodelist=contigpre.nodelist
   if contigtemp.predirection[i]:
    contigpre.sequence=contigpre.sequence+contigtemp.sequence[kmm:]
    contigpre.reverse= contigtemp.reverse[:-kmm] +contigpre.reverse
    for i in range(len(contigtemp.sequence)-kmm):
     prenodelist.append([0,None,None])
   else:
    contigpre.sequence= contigtemp.reverse[:-kmm] +contigpre.sequence
    contigpre.reverse=contigpre.reverse+contigtemp.sequence[kmm:]
    for i in range(len(contigtemp.sequence)-kmm):
     prenodelist.insert( 0,[0,None,None])
   contigpre.sum+= float(contigtemp.sum)/lp
   contigpre.weight=contigpre.sum/float(len(contigpre.sequence) -kmm)
  return True
 if nextsumcond and (ln >= lp or not(presumcond)):
#  print 'next repeat:'
  kmm=k_mer-1
  if contigtemp in contigtemp.next:
   ln-=1
  for i in range(len(contigtemp.next)):
   contignext=contigtemp.next[i]
   if contignext is contigtemp:
    continue
   nextnodelist=contignext.nodelist
   if contigtemp.nextdirection[i]:
    contignext.sequence= contigtemp.sequence[:-kmm] +contignext.sequence
    contignext.reverse=contignext.reverse+contigtemp.reverse[kmm:]
    for i in range(len(contigtemp.sequence)-kmm):
     nextnodelist.insert( 0,[0,None,None])
   else:
    contignext.sequence=contignext.sequence+contigtemp.reverse[kmm:]
    contignext.reverse= contigtemp.sequence[:-kmm] +contignext.reverse
    for i in range(len(contigtemp.sequence)-kmm):
     nextnodelist.append([0,None,None])
   contignext.sum+= float(contigtemp.sum)/ln
   contignext.weight=contignext.sum/float(len(contignext.sequence) -kmm)
  return True
 return False

def arrangerepeat(contigpool):
 cl=len(contigpool)
 if cl == 1:
  return contigpool
 def cwp( a,b):
  if a.weight > b.weight:
   return 1
  return -1
 def clp( a,b):
  if (len(a.pre) + len(a.next)) > (len(b.pre) + len(b.next)):
   return 1
  return -1
 if repeatsort=='w':
  cmp=cwp
 if repeatsort=='l':
  cmp=clp
 pl=cl+1
# cs=sum([len(contigtemp.sequence) for contigtemp in contigpool])
# ps=cs+1
 while cl<pl:
  if repeatorder=='i':
   contigpool.sort(cmp)
  if repeatorder=='d':
   contigpool.sort(cmp,reverse=True)
#  ps=cs
  pl=cl
#  print 'start', pl,ps
#  ps=ps-k_mer+1
#  showcontigstructure(contigpool)
  for i in range(cl-1,-1,-1):
   if checkrepeat(contigpool[i]):
#    print contigpool[i].id
    contigpool.pop(i)
    contigpool=cleancontig(contigpool)
    contigpool=extendcontig(contigpool)
    cl = len(contigpool)
    break
#  cs=sum([len(contigtemp.sequence) for contigtemp in contigpool])
#  if cs < ps:
#   print 'problem occur:', pl
#   print cl, cs
#   showcontigstructure(contigpool)
 return contigpool

def removerepeat(contigpool):
 contigpool = splitcontig(contigpool)
 contigpool=[ arrangerepeat(contigtemp) for contigtemp in contigpool]
 contigpool=joincontig(contigpool)
 return contigpool

def splitcontig(contigpool):
 contigpool=refreshcontig(contigpool)
 lpool=[]
 for contigtemp in contigpool:
  if contigtemp.fresh:
   if len(contigtemp.pre) or len(contigtemp.next):
    pool=bfscontig(contigtemp)
    [setattr(temp,'fresh',False) for temp in pool ]
   else:
    pool=[contigtemp]
    contigtemp.fresh=False
   lpool.append(pool)
 return lpool

def joincontig(lpool):
 contigpool=[]
 for pool in lpool:
  if len(pool) >1:
   contigpool.extend(pool)
  else:
   contigpool.append(pool[0])
 return contigpool

def arrangecontig(contigpool):
 contigpool=refreshcontig(contigpool)
 lpool=[]
 for contigtemp in contigpool:
  if contigtemp.fresh:
   if len(contigtemp.pre) or len(contigtemp.next):
    pool=bfscontig(contigtemp)
    [setattr(temp,'fresh',False) for temp in pool ]
   else:
    pool=[contigtemp]
    contigtemp.fresh=False
   lpool.append(pool)
 def cmp( a,b):
  if len(b) > len(a):
   return -1
  elif len(b) < len(a):
   return 1
  elif len(b[0].sequence) > len(a[0].sequence):
   return -1
  return 1
 lpool.sort(cmp)
 contigpool=[]
 for pool in lpool:
  if len(pool) >1:
   contigpool.extend(bfscontig(pool[-1] ))
  else:
   contigpool.append(pool[0])
 return contigpool

def bfscontig(current):
 pool = [current]
 i=0
 while i < len(pool):
  current=pool[i]
  i+=1
  for temp in current.pre+current.next:
   if temp not in pool:
    pool.append(temp)
 return pool

def refreshcontig(contigpool):
 [setattr(contigtemp,'fresh',True) for contigtemp in contigpool ]
 return contigpool

def cleancontig(contigpool):
 for contigtemp in contigpool:
  contigtemp.pre=[]
  contigtemp.predirection=[]
  contigtemp.next=[]
  contigtemp.nextdirection=[]
 return contigpool

def prunecontig(contigpool):
 [setattr(contigtemp,'fresh',True) for contigtemp in contigpool ]
 for contigtemp in contigpool:
  for j in range(len(contigtemp.next)-1,-1,-1):
   if contigtemp.next[j].fresh== False:
    contigtemp.next.pop(j)
    contigtemp.nextdirection.pop(j)
  for j in range(len(contigtemp.pre)-1,-1,-1):
   if contigtemp.pre[j].fresh== False:
    contigtemp.pre.pop(j)
    contigtemp.predirection.pop(j)
 contigpool= mergecontig(contigpool)
 return contigpool

#def buildcontig(contigpool):
# cl=len(contigpool)
# dg={}
# for i in range(cl):
#  for j in range(2):
#   seed =contigpool[i].contigseedlist[j]
#   seedsequence =seed.getsequence()
#   if seedsequence not in dg:
#    dg[seedsequence] =[]
#   dg[seedsequence].append(seed)
# return dg

#def linkcontig(contigpool,dg,d):
# cl=len(contigpool)
# for i in range(cl):
#  contigtemp=contigpool[i]
#  sequence=contigtemp.sequence[-(k_mer-d):]
#  (contigtemp.next,contigtemp.nextdirection)= getpool( sequence, dg)
#  reverse=contigtemp.reverse[-(k_mer-d):]
#  (contigtemp.pre,contigtemp.predirection)= getpool( reverse, dg)
#  contigtemp.predirection =[ not contigtemp.predirection[i] for i in range(len(contigtemp.predirection))]
# return contigpool

def mergecontig(contigpool,d=1):
 pool=[]
 for   contigtemp in contigpool:
  if not contigtemp.fresh:
   continue
  temp=contigtemp.extendnext(contigtemp,True,d)
  while temp is not None:
   (current,samedirection)=temp
   current.fresh=False
   temp=contigtemp.extendnext(current,samedirection,d)
  temp=contigtemp.extendpre(contigtemp,True,d)
  while temp is not None:
   (current,samedirection)=temp
   current.fresh=False
   temp=contigtemp.extendpre(current,samedirection,d)
  contigtemp.fresh=False
  contigtemp.weight=contigtemp.sum/float(len(contigtemp.sequence)+1-k_mer)
  pool.append(contigtemp)
 return pool

def showscaffoldstructure(contigpool, infile =None):
 if infile:
  infile=open(infile,'w+')
  infile.write(listscaffold(contigpool))
  infile.close()
 else:
  print listscaffold(contigpool),

def listscaffold(contigpool,sequence=False):
##lc: line constrain
 lc=2000
 cl=len(contigpool)
 if cl ==0:
  return '>'
 buffer=[]
 for i in range(cl):
  contigpool[i].id=i+1
 for i in range(cl):
#  contigpool[i].findlist()
  ci=contigpool[i]
  s=ci.sequence
  ls=len(s)
  printtemp= '>id '+str(ci.id)+' length '+str(ls)+' weight '+str(ci.weight)
  pre=contigpool[i].pre
  predirection=contigpool[i].predirection
  predistance=contigpool[i].predistance
  printtemp+='\npre'
  for j in range(len(pre)):
   printtemp=printtemp+' '+str(pre[j].id)+','+str(predirection[j])+','+str(predistance[j])
  next=contigpool[i].next
  nextdirection=contigpool[i].nextdirection
  nextdistance=contigpool[i].nextdistance
  printtemp+='\nnext'
  for j in range(len(next)):
   printtemp=printtemp+' '+str(next[j].id)+','+str(nextdirection[j])+','+str(nextdistance[j])
  buffer.append(printtemp)
  if not sequence:
   continue
  if ls<=lc:
   buffer.append(str(s))
  else:
   np=int(math.ceil(ls/float(lc)))-1
   sl=[s[j*lc:(j+1)*lc] for j in range(np)]
   sl.append(s[np*lc:])
   buffer.append(str('\n'.join(sl)))
 return '\n'.join(buffer)+'\n'

def showcontig(contigpool, infile =None):
 if infile:
  infile=open(infile,'w+')
  infile.write(listcontig(contigpool,True))
  infile.close()
 else:
  print listcontig(contigpool,True),

def showcontigstructure(contigpool, infile =None):
 if infile:
  infile=open(infile,'w+')
  infile.write(listcontig(contigpool))
  infile.close()
 else:
  print listcontig(contigpool),

def listcontig(contigpool,sequence=False):
##lc: line constrain
 lc=2000
 cl=len(contigpool)
 if cl ==0:
  return '>'
 buffer=[]
 for i in range(cl):
  contigpool[i].id=i+1
 for i in range(cl):
#  contigpool[i].findlist()
  ci=contigpool[i]
  s=ci.sequence
  ls=len(s)
  printtemp= '>id '+str(ci.id)+' length '+str(ls)+' weight '+str(ci.weight)
  pre=contigpool[i].pre
  predirection=contigpool[i].predirection
  printtemp+='\npre'
  for j in range(len(pre)):
   printtemp=printtemp+' '+str(pre[j].id)+','+str(predirection[j])
  next=contigpool[i].next
  nextdirection=contigpool[i].nextdirection
  printtemp+='\nnext'
  for j in range(len(next)):
   printtemp=printtemp+' '+str(next[j].id)+','+str(nextdirection[j])
  buffer.append(printtemp)
  if not sequence:
   continue
  if ls<=lc:
   buffer.append(str(s))
  else:
   np=int(math.ceil(ls/float(lc)))-1
   sl=[s[j*lc:(j+1)*lc] for j in range(np)]
   sl.append(s[np*lc:])
   buffer.append(str('\n'.join(sl)))
 return '\n'.join(buffer)+'\n'

def writefastafile( readlist, infile):
 lc=2000
 infile=open(infile,'w')
 for i in range(len(readlist)):
  s=readlist[i]
  ls=len(s)
  infile.write('> id:'+str(i)+'\n')
  if ls<=lc:
   infile.write(str(s)+'\n')
  else:
   np=int(math.ceil(ls/float(lc)))-1
   sl=[s[j*lc:(j+1)*lc] for j in range(np)]
   sl.append(s[np*lc:])
   infile.write(str('\n'.join(sl))+'\n')
 infile.close()

def writecfafile( infile, contigpool):
 infile=open(infile,'w+')
##lc: line constrain
 lc=60
 def showdirection(d):
  if d:
   return 'T'
  else:
   return 'F'
 cl=len(contigpool)
 if cl ==0:
  infile.write('\r\n')
  infile.close()
  return
 for i in range(cl):
  contigpool[i].id=i+1
 for i in range(cl):
#  contigpool[i].findlist()
  ci=contigpool[i]
  s=ci.sequence
  ls=len(s)
  printtemp= '>id '+str(ci.id)+' length '+str(ls)+' coverage '+str(int(math.ceil(ci.weight)))
  pre=contigpool[i].pre
  predirection=contigpool[i].predirection
  printtemp+=' p'
  for j in range(len(pre)):
   printtemp=printtemp+' '+str(pre[j].id)+str(showdirection(predirection[j]))
  next=contigpool[i].next
  nextdirection=contigpool[i].nextdirection
  printtemp+=' n'
  for j in range(len(next)):
   printtemp=printtemp+' '+str(next[j].id)+str(showdirection(nextdirection[j]))
  printtemp+='\r\n'
  infile.write(printtemp)
  if ls<=lc:
   infile.write(str(s))
   infile.write('\r\n')
  else:
   np=int(math.ceil(ls/float(lc)))-1
   sl=[s[j*lc:(j+1)*lc] for j in range(np)]
   sl.append(s[np*lc:])
   infile.write(str('\r\n'.join(sl)))
   infile.write('\r\n')
 infile.close()
#  print(contigpool[i].reverse)
#  print(contigpool[i].sequence[-k_mer:])
# for i in range(cl):
#  print(contigpool[i].id,':')
#  for j in range(len(contigpool[i].sequence)):
#   print(contigpool[i].sequence[j],contigpool[i].list[j])

def stacontig(contigpool):
 contiglen=[len(contigpool[i].sequence) for i in range(len(contigpool))]
 kmm=k_mer -1
 totalnode=sum([temp-kmm for temp in contiglen ])
 totalweight=sum([contigtemp.sum for contigtemp in contigpool])
 if totalnode ==0:
  averageweight =0
  meanlength=0
 else:
  averageweight=totalweight/totalnode
  meanlength= sum(contiglen)/len(contiglen)
 print 'nodes:', totalnode
# print 'total weight =', totalweight
# print 'average weight =', averageweight
 stanumber(contiglen)
# print 'number of linked contig :', len([temp for temp in contigpool if len(temp.pre) or len(temp.next) ])
 return (meanlength,averageweight)

def stapath(contigpool):
 contiglen=[len(contigpool[i].sequence) for i in range(len(contigpool))]
 kmm=k_mer -1
 totalnode=sum([temp-kmm for temp in contiglen ])
 print 'nodes:', totalnode
 print 'paths:', len(contigpool)

def stasequence(contigpool):
 stanumber([len(contigpool[i]) for i in range(len(contigpool))])

def stanumber(conlen):
 cl=len(conlen)
 if cl ==0:
  print 'no contig'
  return
 sumconlen=sum(conlen)
 maxconlen=max(conlen)
 minconlen=min(conlen)
 meanconlen=sumconlen/cl
 print 'total length:', sumconlen
 print 'contigs:', cl
 print 'max length:', maxconlen
# print 'minimum length:', minconlen
 print 'average length', meanconlen
 conlen.sort()
 medianconlen=conlen[int(math.ceil(cl/2))]
# print 'median length:', medianconlen
 halfconlen=sumconlen/2
 sumconlen=0
 for i in range(cl):
  sumconlen=sumconlen+conlen[i]
  if sumconlen> halfconlen:
   n50=conlen[i]
   break
 print 'n50:', n50

def getpool( sequence, dg):
 findpool=[]
 finddirection=[]
 seed=sequence[:i_mer]
 if seed not in dg:
  return (findpool,finddirection)
 sequence=sequence[i_mer:]
 seedpool= dg[seed]
 for j in range(len(seedpool)):
  temp=seedpool[j].contig
  if seedpool[j].direction:
   tempsequence=temp.sequence[i_mer:i_mer+len(sequence)]
  else:
   tempsequence=temp.reverse[i_mer:i_mer+len(sequence)]
  if sequence == tempsequence:
   findpool.append(temp)
   finddirection.append( seedpool[j].direction)
 return (findpool,finddirection)

def spreprocessingcontig(nodepool):
 print 'split node:', time.time()
 pool=[]
 for nodei in nodepool:
  pool.extend(nodei.splitnode())
 nodepool=pool
 print 'initial contig:', time.time()
 dg={}
 kmm=k_mer-1
 kmt=kmm-1
 for nodei in nodepool:
#  nodei.reducekmer()
  nodeseed=nodei.sequence[1:kmm]
  if nodeseed in dg:
   dg[nodeseed]+=1
  else:
   nodei.sequence=nodei.getreverse()
   nodeseed=nodei.sequence[1:kmm]
   if nodeseed in dg:
    dg[nodeseed]+=1
   else:
    dg[nodeseed] =1
 (dg,dga)=fdg(dg)
 for nodei in nodepool:
  nodeseed=nodei.sequence[1:kmm]
  dgs=dg[nodeseed]
  dga[dga[dgs]]=nodei
  dga[dgs]+=1
# canseed= [ seed for seed in dgk if dga[seed] - seed >2]
# canpool=[]
# [canpool.extend(dga[seed+1:dga[seed]] for seed in canseed]
# tempcan =[(seed.sequence,seed.getreverse()) for seed in canseed ]
# canseed=[]
# [canseed.extend((seed[0][2:],seed[0][:kmt],seed[1][2:],seed[1][:kmt])) for seed in tempcan ]
# canseed=set(canseed) & set(dgk)
# [canpool.extend(dga[seed+1:dga[seed]] for seed in canseed]
# for nodei in canpool:
 bpool =[]
 dgv= dg.values()
# dgk= dg.keys()
 for seed in dgv:
  if dga[seed] - seed >2:
   for nodei in dga[seed+1: dga[seed]]:
    nodei.fresh= False
    bpool.append(nodei)
 apool=[]
 for nodei in nodepool:
  if not nodei.fresh:
   continue
  nodei.fresh=False
  temps=nodei.sequence
  tempc=[nodei]
  nodeinodei=nodei
  while True:
   nodeseed=dg.get(nodei.sequence[2:])
   if nodeseed is not None:
    dgl=dga[nodeseed]
    check = nodei.sequence[1]
    if dgl> nodeseed+2:
     for k in range(nodeseed+1,dgl):
      if dga[k].sequence[0] == check:
       nodei= dga[k]
       break
    elif dga[nodeseed+1].sequence[0] == check:
     nodei= dga[nodeseed+1]
    else:
     break
    if nodei.fresh:
     nodei.fresh = False
     temps += nodei.sequence[kmm]
     tempc.append(nodei)
    else:
     break
   else:
    rs=nodei.getreverse()
    nodeseed=dg.get(rs[:kmt])
    if nodeseed is not None:
     dgl=dga[nodeseed]
     check = rs[kmt]
     if dgl> nodeseed+2:
      for k in range(nodeseed+1,dgl):
       if dga[k].sequence[kmm] == check:
        nodei=dga[k]
        break
     elif dga[nodeseed+1].sequence[kmm] == check:
      nodei=dga[nodeseed+1]
     else:
      break
     if nodei.fresh:
      nodei.fresh = False
      nodei.sequence=nodei.getreverse()
      temps += nodei.sequence[kmm]
      tempc.append(nodei)
     else:
      break
    else:
     break
  nodei=nodeinodei
  while True:
   nodeseed=dg.get(nodei.sequence[:kmm])
   if nodeseed is not None:
    dgl=dga[nodeseed]
    check = nodei.sequence[kmt]
    if dgl> nodeseed+2:
     for k in range(nodeseed+1,dgl):
      if dga[k].sequence[kmm] == check:
       nodei= dga[k]
       break
    elif dga[nodeseed+1].sequence[kmm] == check:
     nodei= dga[nodeseed+1]
    else:
     break
    if nodei.fresh:
     nodei.fresh = False
     temps = nodei.sequence[0]+temps
     tempc.insert(0,nodei)
    else:
     break
   else:
    rs=nodei.getreverse()
    nodeseed=dg.get(rs[2:])
    if nodeseed is not None:
     dgl=dga[nodeseed]
     check = rs[1]
     if dgl> nodeseed+2:
      for k in range(nodeseed+1,dgl):
       if dga[k].sequence[0] == check:
        nodei=dga[k]
        break
     elif dga[nodeseed+1].sequence[0] == check:
      nodei=dga[nodeseed+1]
     else:
      break
     if nodei.fresh:
      nodei.fresh = False
      nodei.sequence=nodei.getreverse()
      temps = nodei.sequence[0]+temps
      tempc.insert(0,nodei)
     else:
      break
    else:
     break
  apool.append(tempc)
  apool.append(temps)
 del dg
 del dga
 apool=[contig(apool[i],apool[i+1]) for i in range(0,len(apool),2)]
 apool.extend([contig(bpooli) for bpooli in bpool ])
 return apool

def tpreprocessingcontig(nodepool):
 apool=nodepool
 print 'initial contig:', time.time()
 dg={}
 bpool=[]
 kmm=k_mer-1
 kmt=kmm-1
 for nodei in apool:
#  nodei.reducekmer()
  nodeseed=nodei.sequence[1:kmm]
  dgnode=dg.get(nodeseed)
  if dgnode is not None:
   if dgnode.fresh:
    dgnode.fresh=False
    bpool.append(dgnode)
   nodei.fresh=False
   bpool.append(nodei)
  else:
   dgnode=dg.get(nodei.getreverse()[1:kmm])
   if dgnode is not None:
    if dgnode.fresh:
     dgnode.fresh=False
     bpool.append(dgnode)
    nodei.fresh=False
    bpool.append(nodei)
   else:
    dg[nodeseed] =nodei
 for i in range(len(apool)-1,-1,-1):
  tempnode=apool[i]
  if not tempnode.fresh:
   apool.pop(i)
   continue
  tempkmer=tempnode.kmerlist[0]
  if tempkmer.index ==0 or tempkmer.index == (tempkmer.read.kl -1):
   tempkmer=False
   for j in range(1,len(tempnode.kmerlist)):
    temp=tempnode.kmerlist[j]
    if temp.index !=0 and temp.index != (temp.read.kl-1):
     tempkmer=temp
     break
  if tempkmer:
   tkl= tempkmer.read.kmerlist
   tn1=tkl[ tempkmer.index+1].node
   tn2=tkl[ tempkmer.index-1].node
#   if tn1.fresh >0 and tn2.fresh >0:
   tempnode.kmerlist.append( tn2)
   tempnode.kmerlist.append( tn1)
   continue
  tempnode.fresh=2
#  tempnode.direction=False
  bpool.append(apool.pop(i))
 print 'cl a', len(apool), 'cl b', len(bpool), 'cl ', len(apool)+len(bpool)
 contigpool=[]
 kmm=k_mer -1
 notideal=0
 for nodetemp in apool:
  if nodetemp.fresh != 1:
   continue
  nodetemp.fresh=0
  contigtemp=[nodetemp]
  nodesequence=nodetemp.sequence[:]
  ntn=nodetemp.kmerlist.pop()
  ntp=nodetemp.kmerlist.pop()
  temptemp=nodetemp
  while ntn.fresh ==1:
   nt1=ntn.kmerlist.pop()
   nt2=ntn.kmerlist.pop()
   if nt2 is nodetemp:
    if nodesequence[-kmm:] == ntn.sequence[:kmm]:
     nodesequence += ntn.sequence[kmm]
    else:
     rsequence=ntn.getreverse()
     if nodesequence[-kmm:] == rsequence[:kmm]:
      nodesequence += rsequence[kmm]
     else:
      ntn.fresh=2
      bpool.append(ntn)
      notideal+=1
      break
    ntn.fresh=0
    contigtemp.append(ntn)
    nodetemp=ntn
    ntn=nt1
   elif nt1 is nodetemp:
    if nodesequence[-kmm:] == ntn.sequence[:kmm]:
     nodesequence += ntn.sequence[kmm]
    else:
     rsequence=ntn.getreverse()
     if nodesequence[-kmm:] == rsequence[:kmm]:
      nodesequence += rsequence[kmm]
     else:
      ntn.fresh=2
      bpool.append(ntn)
      notideal+=1
      break
    ntn.fresh=0
    contigtemp.append(ntn)
    nodetemp=ntn
    ntn=nt2
   else:
    ntn.fresh=2
    bpool.append(ntn)
    notideal+=1
    break
#    print 'error at extendcontig'
#    sys.exit(0)
#  del ntn.fresh
#  if ntn.direction == 1:
#   print 'error at extendcontig'
#   sys.exit(0)
  nodetemp=temptemp
  while ntp.fresh ==1:
   nt1=ntp.kmerlist.pop()
   nt2=ntp.kmerlist.pop()
   if nt1 is nodetemp:
    if nodesequence[:kmm] == ntp.sequence[1:]:
     nodesequence = ntp.sequence[0] + nodesequence
    else:
     rsequence=ntp.getreverse()
     if nodesequence[:kmm] == rsequence[1:]:
      nodesequence = rsequence[0] + nodesequence
     else:
      ntp.fresh=2
      bpool.append(ntp)
      notideal+=1
      break
    ntp.fresh=0
    contigtemp.insert(0,ntp)
    nodetemp=ntp
    ntp=nt2
   elif nt2 is nodetemp:
    if nodesequence[:kmm] == ntp.sequence[1:]:
     nodesequence = ntp.sequence[0] + nodesequence
    else:
     rsequence=ntp.getreverse()
     if nodesequence[:kmm] == rsequence[1:]:
      nodesequence = rsequence[0] + nodesequence
     else:
      ntp.fresh=2
      bpool.append(ntp)
      notideal+=1
      break
    ntp.fresh=0
    contigtemp.insert(0,ntp)
    nodetemp=ntp
    ntp=nt1
   else:
    ntp.fresh=2
    bpool.append(ntp)
    notideal+=1
    break
#    print 'error at extendcontig'
#    sys.exit(0)
#  del ntp.fresh
#  if ntp.direction == 1:
#   print 'error at extendcontig direction check'
#   sys.exit(0)
# for i in range(len(apool)):
#  del apool[i].fresh
  contigpool.append(contigtemp)
  contigpool.append(nodesequence)
 if notideal>0:
  print 'notideal=',notideal
 contigpool=[contig(contigpool[i],contigpool[i+1]) for i in range(0,len(contigpool),2)]
 contigpool.extend([contig(bpool[i]) for i in range(len(bpool)) ])
 return contigpool

def cpreprocessingcontig(nodepool):
 print 'split node:', time.time()
 pool=[]
 for nodei in nodepool:
  pool.extend(nodei.splitnode())
 nodepool=pool
 print 'initial contig:', time.time()
 dg={}
 kmm=k_mer-1
 kmt=kmm-1
 for nodei in nodepool:
#  nodei.reducekmer()
  nodeseed=nodei.sequence[1:kmm]
  if nodeseed in dg:
   dg[nodeseed]+=1
  else:
   nodei.sequence=nodei.getreverse()
   nodeseed=nodei.sequence[1:kmm]
   if nodeseed in dg:
    dg[nodeseed]+=1
   else:
    dg[nodeseed] =1
 (dg,dga)=fdg(dg)
 for nodei in nodepool:
  nodeseed=nodei.sequence[1:kmm]
  dgs=dg[nodeseed]
  dga[dga[dgs]]=nodei
  dga[dgs]+=1
# canseed= [ seed for seed in dgk if dga[seed] - seed >2]
# canpool=[]
# [canpool.extend(dga[seed+1:dga[seed]] for seed in canseed]
# tempcan =[(seed.sequence,seed.getreverse()) for seed in canseed ]
# canseed=[]
# [canseed.extend((seed[0][2:],seed[0][:kmt],seed[1][2:],seed[1][:kmt])) for seed in tempcan ]
# canseed=set(canseed) & set(dgk)
# [canpool.extend(dga[seed+1:dga[seed]] for seed in canseed]
# for nodei in canpool:
 bpool =[]
 for nodei in nodepool:
  nodeseed=dg.get(nodei.sequence[2:])
  if nodeseed is not None:
   dgl=dga[nodeseed]
   if dgl> nodeseed+2:
    check = nodei.sequence[1]
    brench =0
    for k in range(nodeseed+1,dgl):
     if dga[k].sequence[0] == check:
      brench+=1
    if brench >1:
     nodei.fresh = False
     bpool.append(nodei)
     continue
  else:
   rs=nodei.getreverse()
   nodeseed=dg.get(rs[:kmt])
   if nodeseed is not None:
    dgl=dga[nodeseed]
    if dgl> nodeseed+2:
     check = rs[kmt]
     brench =0
     for k in range(nodeseed+1,dgl):
      if dga[k].sequence[kmm] == check:
       brench+=1
     if brench >1:
      nodei.fresh = False
      bpool.append(nodei)
      continue
  nodeseed=dg.get(nodei.sequence[:kmm])
  if nodeseed is not None:
   dgl=dga[nodeseed]
   if dgl> nodeseed+2:
    check = nodei.sequence[kmt]
    brench =0
    for k in range(nodeseed+1,dgl):
     if dga[k].sequence[kmm] == check:
      brench+=1
    if brench >1:
     nodei.fresh = False
     bpool.append(nodei)
     continue
  else:
   rs=nodei.getreverse()
   nodeseed=dg.get(rs[2:])
   if nodeseed is not None:
    dgl=dga[nodeseed]
    if dgl> nodeseed+2:
     check = rs[1]
     brench =0

     for k in range(nodeseed+1,dgl):
      if dga[k].sequence[0] == check:
       brench+=1
     if brench >1:
      nodei.fresh = False
      bpool.append(nodei)
      continue
 apool=[ nodei for nodei in nodepool if nodei.fresh == 1]
 print 'cl a', len(apool), 'cl b', len(bpool), 'cl ', len(apool)+len(bpool)
 for i in range(len(apool)-1,-1,-1):
  tempnode=apool[i]
  tempkmer=tempnode.kmerlist[0]
  if tempkmer.index ==0 or tempkmer.index == (tempkmer.read.kl -1):
   tempkmer=False
   for j in range(1,len(tempnode.kmerlist)):
    temp=tempnode.kmerlist[j]
    if temp.index !=0 and temp.index != (temp.read.kl-1):
     tempkmer=temp
     break
  if tempkmer:
   tkl= tempkmer.read.kmerlist
   tn1=tkl[ tempkmer.index+1].node
   tn2=tkl[ tempkmer.index-1].node
#   if tn1.fresh >0 and tn2.fresh >0:
   tempnode.kmerlist.append( tn2)
   tempnode.kmerlist.append( tn1)
   continue
  tempnode.fresh=2
#  tempnode.direction=False
  bpool.append(apool.pop(i))
 contigpool=[]
 kmm=k_mer -1
 notideal=0
 for nodetemp in apool:
  if nodetemp.fresh != 1:
   continue
  nodetemp.fresh=0
  contigtemp=[nodetemp]
  nodesequence=nodetemp.sequence[:]
  ntn=nodetemp.kmerlist.pop()
  ntp=nodetemp.kmerlist.pop()
  temptemp=nodetemp
  while ntn.fresh ==1:
   nt1=ntn.kmerlist.pop()
   nt2=ntn.kmerlist.pop()
   if nt2 is nodetemp:
    if nodesequence[-kmm:] == ntn.sequence[:kmm]:
     nodesequence += ntn.sequence[kmm]
    else:
     rsequence=ntn.getreverse()
     if nodesequence[-kmm:] == rsequence[:kmm]:
      nodesequence += rsequence[kmm]
     else:
      ntn.fresh=2
      bpool.append(ntn)
      notideal+=1
      break
    ntn.fresh=0
    contigtemp.append(ntn)
    nodetemp=ntn
    ntn=nt1
   elif nt1 is nodetemp:
    if nodesequence[-kmm:] == ntn.sequence[:kmm]:
     nodesequence += ntn.sequence[kmm]
    else:
     rsequence=ntn.getreverse()
     if nodesequence[-kmm:] == rsequence[:kmm]:
      nodesequence += rsequence[kmm]
     else:
      ntn.fresh=2
      bpool.append(ntn)
      notideal+=1
      break
    ntn.fresh=0
    contigtemp.append(ntn)
    nodetemp=ntn
    ntn=nt2
   else:
    ntn.fresh=2
    bpool.append(ntn)
    notideal+=1
    break
#    print 'error at extendcontig'
#    sys.exit(0)
#  del ntn.fresh
#  if ntn.direction == 1:
#   print 'error at extendcontig'
#   sys.exit(0)
  nodetemp=temptemp
  while ntp.fresh ==1:
   nt1=ntp.kmerlist.pop()
   nt2=ntp.kmerlist.pop()
   if nt1 is nodetemp:
    if nodesequence[:kmm] == ntp.sequence[1:]:
     nodesequence = ntp.sequence[0] + nodesequence
    else:
     rsequence=ntp.getreverse()
     if nodesequence[:kmm] == rsequence[1:]:
      nodesequence = rsequence[0] + nodesequence
     else:
      ntp.fresh=2
      bpool.append(ntp)
      notideal+=1
      break
    ntp.fresh=0
    contigtemp.insert(0,ntp)
    nodetemp=ntp
    ntp=nt2
   elif nt2 is nodetemp:
    if nodesequence[:kmm] == ntp.sequence[1:]:
     nodesequence = ntp.sequence[0] + nodesequence
    else:
     rsequence=ntp.getreverse()
     if nodesequence[:kmm] == rsequence[1:]:
      nodesequence = rsequence[0] + nodesequence
     else:
      ntp.fresh=2
      bpool.append(ntp)
      notideal+=1
      break
    ntp.fresh=0
    contigtemp.insert(0,ntp)
    nodetemp=ntp
    ntp=nt1
   else:
    ntp.fresh=2
    bpool.append(ntp)
    notideal+=1
    break
#    print 'error at extendcontig'
#    sys.exit(0)
#  del ntp.fresh
#  if ntp.direction == 1:
#   print 'error at extendcontig direction check'
#   sys.exit(0)
# for i in range(len(apool)):
#  del apool[i].fresh
  contigpool.append(contigtemp)
  contigpool.append(nodesequence)
 if notideal>0:
  print 'notideal=',notideal
 contigpool=[contig(contigpool[i],contigpool[i+1]) for i in range(0,len(contigpool),2)]
 contigpool.extend([contig(bpool[i]) for i in range(len(bpool)) ])
 return contigpool

def npreprocessingcontig(nodepool):
 apool=[]
 bpool=[]
 for i in range(len(nodepool)):
  temp=nodepool[i].splitnode()
  if len(temp) >1:
   for j in range(len(temp)):
#    temp[j].direction=False
    temp[j].fresh=0
   bpool.extend(temp)
  else:
   apool.append(temp[0])
 print 'cl a', len(apool), 'cl b', len(bpool), 'cl ', len(apool)+len(bpool)
 for i in range(len(apool)-1,-1,-1):
  tempnode=apool[i]
  tempkmer=tempnode.kmerlist[0]
  if tempkmer.index ==0 or tempkmer.index == (tempkmer.read.kl -1):
   tempkmer=False
   for j in range(1,len(tempnode.kmerlist)):
    temp=tempnode.kmerlist[j]
    if temp.index !=0 and temp.index != (temp.read.kl-1):
     tempkmer=temp
     break
  if tempkmer:
   tkl= tempkmer.read.kmerlist
   tn1=tkl[ tempkmer.index+1].node
   tn2=tkl[ tempkmer.index-1].node
#   if tn1.fresh >0 and tn2.fresh >0:
   tempnode.kmerlist.append( tn2)
   tempnode.kmerlist.append( tn1)
   continue
  tempnode.fresh=2
#  tempnode.direction=False
  bpool.append(apool.pop(i))
 contigpool=[]
 kmm=k_mer -1
 notideal=0
 for i in range(len(apool)):
  nodetemp=apool[i]
  if nodetemp.fresh != 1:
   continue
  nodetemp.fresh=0
  contigtemp=[nodetemp]
  nodesequence=nodetemp.sequence[:]
  ntn=nodetemp.kmerlist.pop()
  ntp=nodetemp.kmerlist.pop()
  temptemp=nodetemp
  while ntn.fresh ==1:
   nt1=ntn.kmerlist.pop()
   nt2=ntn.kmerlist.pop()
   if nt2 is nodetemp:
    if nodesequence[-kmm:] == ntn.sequence[:kmm]:
     nodesequence += ntn.sequence[kmm]
    else:
     rsequence=ntn.getreverse()
     if nodesequence[-kmm:] == rsequence[:kmm]:
      nodesequence += rsequence[kmm]
     else:
      ntn.fresh=2
      bpool.append(ntn)
      notideal+=1
      break
    ntn.fresh=0
    contigtemp.append(ntn)
    nodetemp=ntn
    ntn=nt1
   elif nt1 is nodetemp:
    if nodesequence[-kmm:] == ntn.sequence[:kmm]:
     nodesequence += ntn.sequence[kmm]
    else:
     rsequence=ntn.getreverse()
     if nodesequence[-kmm:] == rsequence[:kmm]:
      nodesequence += rsequence[kmm]
     else:
      ntn.fresh=2
      bpool.append(ntn)
      notideal+=1
      break
    ntn.fresh=0
    contigtemp.append(ntn)
    nodetemp=ntn
    ntn=nt2
   else:
    ntn.fresh=2
    bpool.append(ntn)
    notideal+=1
    break
#    print 'error at extendcontig'
#    sys.exit(0)
#  del ntn.fresh
#  if ntn.direction == 1:
#   print 'error at extendcontig'
#   sys.exit(0)
  nodetemp=temptemp
  while ntp.fresh ==1:
   nt1=ntp.kmerlist.pop()
   nt2=ntp.kmerlist.pop()
   if nt1 is nodetemp:
    if nodesequence[:kmm] == ntp.sequence[1:]:
     nodesequence = ntp.sequence[0] + nodesequence
    else:
     rsequence=ntp.getreverse()
     if nodesequence[:kmm] == rsequence[1:]:
      nodesequence = rsequence[0] + nodesequence
     else:
      ntp.fresh=2
      bpool.append(ntp)
      notideal+=1
      break
    ntp.fresh=0
    contigtemp.insert(0,ntp)
    nodetemp=ntp
    ntp=nt2
   elif nt2 is nodetemp:
    if nodesequence[:kmm] == ntp.sequence[1:]:
     nodesequence = ntp.sequence[0] + nodesequence
    else:
     rsequence=ntp.getreverse()
     if nodesequence[:kmm] == rsequence[1:]:
      nodesequence = rsequence[0] + nodesequence
     else:
      ntp.fresh=2
      bpool.append(ntp)
      notideal+=1
      break
    ntp.fresh=0
    contigtemp.insert(0,ntp)
    nodetemp=ntp
    ntp=nt1
   else:
    ntp.fresh=2
    bpool.append(ntp)
    notideal+=1
    break
#    print 'error at extendcontig'
#    sys.exit(0)
#  del ntp.fresh
#  if ntp.direction == 1:
#   print 'error at extendcontig direction check'
#   sys.exit(0)
# for i in range(len(apool)):
#  del apool[i].fresh
  contigpool.append(contigtemp)
  contigpool.append(nodesequence)
 if notideal>0:
  print 'notideal=',notideal
 contigpool=[contig(contigpool[i],contigpool[i+1]) for i in range(0,len(contigpool),2)]
 contigpool.extend([contig(bpool[i]) for i in range(len(bpool)) ])
 return contigpool

def npreprocessingcontigtrans(nodepool):
 apool=[]
 bpool=[]
 for i in range(len(nodepool)):
  temp=nodepool[i].splitnode()
  if len(temp) >1:
   for j in range(len(temp)):
#    temp[j].direction=False
    temp[j].fresh=0
   bpool.extend(temp)
  else:
   apool.append(temp[0])
 print 'cl a', len(apool), 'cl b', len(bpool), 'cl ', len(apool)+len(bpool)
 for i in range(len(apool)-1,-1,-1):
  tempnode=apool[i]
  tempkmer=tempnode.kmerlist[0]
  if tempkmer.index ==0 or tempkmer.index == (tempkmer.read.kl -1):
   tempkmer=False
   for j in range(1,len(tempnode.kmerlist)):
    temp=tempnode.kmerlist[j]
    if temp.index !=0 and temp.index != (temp.read.kl-1):
     tempkmer=temp
     break
  if tempkmer:
   tkl= tempkmer.read.kmerlist
   tn1=tkl[ tempkmer.index+1].node
   tn2=tkl[ tempkmer.index-1].node
#   if tn1.fresh >0 and tn2.fresh >0:
   tempnode.kmerlist.append( tn2)
   tempnode.kmerlist.append( tn1)
   continue
  tempnode.fresh=2
#  tempnode.direction=False
  bpool.append(apool.pop(i))
 contigpool=[]
 kmm=k_mer -1
 notideal=0
 for i in range(len(apool)):
  nodetemp=apool[i]
  if nodetemp.fresh != 1:
   continue
  nodetemp.fresh=0
  contigtemp=[nodetemp]
  nodesequence=nodetemp.sequence[:]
  ntn=nodetemp.kmerlist.pop()
  ntp=nodetemp.kmerlist.pop()
  temptemp=nodetemp
  while ntn.fresh ==1:
   nt1=ntn.kmerlist.pop()
   nt2=ntn.kmerlist.pop()
   if nt2 is nodetemp:
    if nodesequence[-kmm:] == ntn.sequence[:kmm]:
     nodesequence += ntn.sequence[kmm]
    else:
     ntn.fresh=2
     bpool.append(ntn)
     notideal+=1
     break
    ntn.fresh=0
    contigtemp.append(ntn)
    nodetemp=ntn
    ntn=nt1
   else:
    ntn.fresh=2
    bpool.append(ntn)
    notideal+=1
    break
#    print 'error at extendcontig'
#    sys.exit(0)
#  del ntn.fresh
#  if ntn.direction == 1:
#   print 'error at extendcontig'
#   sys.exit(0)
  nodetemp=temptemp
  while ntp.fresh ==1:
   nt1=ntp.kmerlist.pop()
   nt2=ntp.kmerlist.pop()
   if nt1 is nodetemp:
    if nodesequence[:kmm] == ntp.sequence[1:]:
     nodesequence = ntp.sequence[0] + nodesequence
    else:
     ntp.fresh=2
     bpool.append(ntp)
     notideal+=1
     break
    ntp.fresh=0
    contigtemp.insert(0,ntp)
    nodetemp=ntp
    ntp=nt2
   else:
    ntp.fresh=2
    bpool.append(ntp)
    notideal+=1
    break
#    print 'error at extendcontig'
#    sys.exit(0)
#  del ntp.fresh
#  if ntp.direction == 1:
#   print 'error at extendcontig direction check'
#   sys.exit(0)
# for i in range(len(apool)):
#  del apool[i].fresh
  contigpool.append(contigtemp)
  contigpool.append(nodesequence)
 if notideal>0:
  print 'notideal=',notideal
 contigpool=[contig(contigpool[i],contigpool[i+1]) for i in range(0,len(contigpool),2)]
 contigpool.extend([contig(bpool[i]) for i in range(len(bpool)) ])
 return contigpool

def preprocessingcontig(nodepool):
 apool=[]
 bpool=[]
 for i in range(len(nodepool)):
  temp=nodepool[i].splitnode()
  if len(temp) >1:
   for j in range(len(temp)):
#    temp[j].direction=False
    temp[j].fresh=0
   bpool.extend(temp)
  else:
   apool.append(temp[0])
 print 'cl a', len(apool), 'cl b', len(bpool), 'cl ', len(apool)+len(bpool)
 for i in range(len(apool)-1,-1,-1):
  tempnode=apool[i]
  tempkmer=tempnode.kmerlist[0]
  if tempkmer.index ==0 or tempkmer.index == (tempkmer.read.kl -1):
   tempkmer=False
   for j in range(1,len(tempnode.kmerlist)):
    temp=tempnode.kmerlist[j]
    if temp.index !=0 and temp.index != (temp.read.kl-1):
     tempkmer=temp
     break
  if tempkmer:
   tkl= tempkmer.read.kmerlist
   tn1=tkl[ tempkmer.index+1].node
   tn2=tkl[ tempkmer.index-1].node
   if tn1.fresh >0 and tn2.fresh >0:
    tempnode.kmerlist.append( tn2)
    tempnode.kmerlist.append( tn1)
    continue
  tempnode.fresh=2
#  tempnode.direction=False
  bpool.append(apool.pop(i))
 contigpool=[]
 kmm=k_mer -1
 notideal=0
 for i in range(len(apool)):
  nodetemp=apool[i]
  if nodetemp.fresh != 1:
   continue
  nodetemp.fresh=0
  contigtemp=[nodetemp]
  nodesequence=nodetemp.sequence[:]
  ntn=nodetemp.kmerlist.pop()
  ntp=nodetemp.kmerlist.pop()
  temptemp=nodetemp
  while ntn.fresh ==1:
   nt1=ntn.kmerlist.pop()
   nt2=ntn.kmerlist.pop()
   if nt2 is nodetemp:
    if nodesequence[-kmm:] == ntn.sequence[:kmm]:
     nodesequence += ntn.sequence[kmm]
    else:
     rsequence=ntn.getreverse()
     if nodesequence[-kmm:] == rsequence[:kmm]:
      nodesequence += rsequence[kmm]
     else:
      ntn.fresh=2
      bpool.append(ntn)
      notideal+=1
      break
    ntn.fresh=0
    contigtemp.append(ntn)
    nodetemp=ntn
    ntn=nt1
   elif nt1 is nodetemp:
    if nodesequence[-kmm:] == ntn.sequence[:kmm]:
     nodesequence += ntn.sequence[kmm]
    else:
     rsequence=ntn.getreverse()
     if nodesequence[-kmm:] == rsequence[:kmm]:
      nodesequence += rsequence[kmm]
     else:
      ntn.fresh=2
      bpool.append(ntn)
      notideal+=1
      break
    ntn.fresh=0
    contigtemp.append(ntn)
    nodetemp=ntn
    ntn=nt2
   else:
    ntn.fresh=2
    bpool.append(ntn)
    notideal+=1
    break
#    print 'error at extendcontig'
#    sys.exit(0)
#  del ntn.fresh
#  if ntn.direction == 1:
#   print 'error at extendcontig'
#   sys.exit(0)
  nodetemp=temptemp
  while ntp.fresh ==1:
   nt1=ntp.kmerlist.pop()
   nt2=ntp.kmerlist.pop()
   if nt1 is nodetemp:
    if nodesequence[:kmm] == ntp.sequence[1:]:
     nodesequence = ntp.sequence[0] + nodesequence
    else:
     rsequence=ntp.getreverse()
     if nodesequence[:kmm] == rsequence[1:]:
      nodesequence = rsequence[0] + nodesequence
     else:
      ntp.fresh=2
      bpool.append(ntp)
      notideal+=1
      break
    ntp.fresh=0
    contigtemp.insert(0,ntp)
    nodetemp=ntp
    ntp=nt2
   elif nt2 is nodetemp:
    if nodesequence[:kmm] == ntp.sequence[1:]:
     nodesequence = ntp.sequence[0] + nodesequence
    else:
     rsequence=ntp.getreverse()
     if nodesequence[:kmm] == rsequence[1:]:
      nodesequence = rsequence[0] + nodesequence
     else:
      ntp.fresh=2
      bpool.append(ntp)
      notideal+=1
      break
    ntp.fresh=0
    contigtemp.insert(0,ntp)
    nodetemp=ntp
    ntp=nt1
   else:
    ntp.fresh=2
    bpool.append(ntp)
    notideal+=1
    break
#    print 'error at extendcontig'
#    sys.exit(0)
#  del ntp.fresh
#  if ntp.direction == 1:
#   print 'error at extendcontig direction check'
#   sys.exit(0)
# for i in range(len(apool)):
#  del apool[i].fresh
  contigpool.append(contigtemp)
  contigpool.append(nodesequence)
 if notideal>0:
  print 'notideal=',notideal
 contigpool=[contig(contigpool[i],contigpool[i+1]) for i in range(0,len(contigpool),2)]
 contigpool.extend([contig(bpool[i]) for i in range(len(bpool)) ])
 return contigpool
#def nodepath( nodelist):
# sequence=nodelist[0].sequence[:]
# overlap=k_mer-1
# for i in range(1,len(nodelist)):
#  s=sequence[-overlap:]
#  nli=nodelist[i]
#  if s == nli.sequence[:overlap]:
#   sequence+= nli.sequence[overlap:]
#  else:
#   rev=nli.getreverse()
#   if s==rev[:overlap]:
#    sequence+=rev[overlap:]
#   else:
#    print 'error at extendcontig sequence'
#    sys.exit(0)
# return sequence
if p_number==0:
 denovoassembly( infile1, infile2 )
elif p_number==1:
 denovoassemblytrans( infile1, infile2 )
elif p_number==2:
 selfmate( infile1, infile2 )
elif p_number==3:
 selfmap( infile1, infile2 )
elif p_number==4:
 fdenovoassembly( infile1, infile2 )
elif p_number==5:
 fdenovoassemblytrans( infile1, infile2 )
elif p_number==6:
 findsmrna( infile1, infile2 )

sys.stdout=ss
ff.close()
ff=open(ffname,'w+')
ff.close()
#os.remove('systemfilec6'+'-'+str(k_mer)+'-'+str(p_mer)+'-'+str(splitparameter)+'-'+str(idealerrorfraction)+'-'+fixstring(infile1)+'-'+fixstring(infile2)+'-'+str(i_number)+'-'+str(t_number))
os.remove(ffname)
print 'end:'
