"""generate PSF information from sequence or coordinates

Particularly useful are the pdbToPSF and seqToPSF routines.
"""

terminalAtoms = ['H5T','H3T']

# this dictionary will hold all residue names found in the corresponding
# topology file. It is built by deduceSeqType
residueTypes = {}
residueTypes['protein']=[]
residueTypes['nucleic']=[]
residueTypes['water']=[]
residueTypes['metal']=[]

residueMap=dict( G = 'GUA' ,   #single character residue names
		 C = 'CYT',
		 T = 'THY',
		 A = 'ADE',
		 I = 'INO',
		 HOH = 'WAT',
		 )


failIfInsertion=True
class InsertionException(Exception):
    "Exception thrown by pdbToSeq if an insertion code is detected"
    def __init__(s):
        Exception.__init__(s,'Sequence has residue insertion(s).')
        return
    pass

def pdbToSeq(pdbRecord,
             useSeqres=False,
             useChainID=True,
             processBiomt=False,
             failIfInsertion=-1,
             ):
    """return a list of list of sequences.

    If useChainID=True (default), and the chainID field is specified in
    pdbRecord, chainID overrides the segid field for naming segments; otherwise
    segid is used.  
    
    processBiomt specifies whether the REMARK 350 BIOMT record is used. By
    default, Biomolecule 1 is read, specifying a different integer to the
    processBiomt argument will read that entry.

    If failIfInsertion=False, a warning message will be printed
    and the residue will be skipped if the insertion code field of the ATOM
    record is not blank. If failIfInsertion=True, an InsertionException will
    be raised if an iCode is encountered. If the argument is omitted,
    the module-local variable of the same name will be used, and its default
    value is True.
    """
    
    import psfGen
    if failIfInsertion==-1: failIfInsertion=psfGen.failIfInsertion
    seqs = []
    seq=[]
    curSeg=0
    curResid=None
    terminate=0
    seg='    '
    remarks350=[]
    resid=None
    lostRes=''  #resid lost if no TER or terminal atoms
    previousTerminate=0 #true if one termination atom has been seen
    beginResid=None
    lines=pdbRecord.split('\n')
    for line in lines:
        if line.startswith('ATOM'): 
            res = line[17:21]
            try:
                resid = int(line[22:26])
            except ValueError:
                raise Exception("unable to read residue number in entry:\n"+
                                line)
            icode = line[26]
            if icode!=' ':
                print 'pdbToSeq: Warning. Found insertion code in record:'
                print '  ', line
                if failIfInsertion:
                    raise InsertionException
                continue
            if beginResid==None: beginResid=resid
            seg = line[72:76]
            seg += ' '*(4-len(seg)) # this for truncated ATOM records
            chainID=line[21]
            if (useChainID or seg=='' or seg=='    ') and chainID!=' ':
                seg = chainID
                pass
            if curSeg==0:
                curSeg=seg
                #print 'curSeg initiated: ', curSeg, chainID, len(seg)
                pass
            atom = line[12:16].strip()
            if seq:
                if (seg!=curSeg or
                    resid!=curResid and resid!=curResid+1):
                   #print "seq change termination >%s< >%s<" % ( seg, curSeg)
                   terminate=1
                   lostRes=res
                #print resid, curResid
                elif len(seq)>1 and atom in terminalAtoms:
                    #print "found terminal atom", len(seq)
                    if previousTerminate:
                        terminate=1
                        previousTerminate=0
                        lostRes=res
                    else:
                        previousTerminate=1
                        pass
                    pass
                pass
            if not terminate and resid!=None and resid!=curResid:
                seq.append(res)
                pass
            pass
        elif useSeqres and line.startswith('SEQRES'):
            return seqres(lines)
        elif line.startswith('TER'):
            terminate=1
        elif line.startswith('ENDMDL'):
            break
        elif line.startswith('REMARK 350'):
            remarks350.append(line)
            pass
        if terminate and seq:
            seqs.append((beginResid,curSeg,seq))
            previousTerminate=0
            beginResid=None
            seq=[]
            curSeg=0
            if lostRes:
                seq.append(lostRes)
                beginResid=resid
                lostRes=''
                curSeg=seg
                pass
            terminate=0
            curResid=beginResid
        else:
            curResid=resid
            pass
        pass
    if seq: seqs.append( (beginResid,curSeg,seq) )
    class PDBToSeqClass:
        pass
    ret=PDBToSeqClass()
    ret.seqs=seqs
    ret.biomt=None
    if processBiomt:
        ret.biomt=processBiomtEntries(remarks350,
                      biomol=processBiomt if processBiomt!=True else 1)
        pass
    return ret

def seqres(lines):
    """ read the pdb SEQRES field
    return a list of lists of sequences.

    SEQRES fields include chain specifiers. If this is blank the segid
    is set from the first ATOM entry.
    """
    from simulationWorld  import SimulationWorld_world as simWorld
    if simWorld().logLevel()!='none':
        print "using seqres field"
        pass
    records = filter(lambda x:x.startswith('SEQRES'),lines)

    chains={}
    for record in records:
        record = record[:min(len(record),72)]
        id=record[11]
        residues=record[19:].split()
        if chains.has_key(id):
            chains[id] += residues
        else:
            chains[id] = residues
            pass
        pass
    ids=chains.keys()
    ids.sort()
    ret=[]
    for id in ids:
        ret.append( [1,id,chains[id]] )
        pass

    #get segment names, if chainids are not specified
    if ret[0][1]!=' ':
        return ret

    segs=[]
    chainCnt=0
    prevChainID=''
    for line in lines:
        if line.startswith('ATOM'): 
            seg = line[72:76]
            chainID=line[21]
            if prevChainID and chainID!= prevChainID:
                chainCnt+=1
                pass
            prevChainID=chainID
            resid = int(line[23:26])
            if ret[chainCnt][1]==' ':
                ret[chainCnt][1]=seg
                pass
            if chainCnt+1 >= len(ret):
                break

            if resid>=ret[chainCnt+1][0]:
                chainCnt+=1
                pass
            pass
        pass

    return ret
        

def autoProcessPdbSSBonds(pdbRecord):
    """ scan a PDB file for PDB SSBOND header fields.
    call addDisulfideBond as appropriate

    """
    from simulationWorld  import SimulationWorld_world as simWorld

    lines=pdbRecord.split('\n')
    records = filter(lambda x:x.startswith('SSBOND'),lines)

    if len(records) > 0:
        if simWorld().logLevel()!='none':		   
            print "using ssbond field"
            pass
        pass
    
    for record in records:

        """ These character positions are hard-wired into tbe PDB format """
        
        fromChainName = record[15:17].strip()
        fromResNum = record[18:21]
        toChainName = record[29:31].strip()
        toResNum = record[32:35]

        if len(fromChainName) > 0:
            fromSel = "(segid " + fromChainName + " and resid " + fromResNum + ")"
        else:
            fromSel = "(resid " + fromResNum + ")"
            pass

        if len(toChainName) > 0:
            toSel = "(segid " + toChainName + " and resid " + toResNum + ")"
        else:
            toSel = "(resid " + toResNum + ")"
            pass
                
        addDisulfideBond(fromSel, toSel)
        pass
    return

def grabResidueNames(file):
    """
    find all residue names in the given file.
    """
    import protocol
    file = protocol.genTopParFilename(file)
    import re
    ret=[]
    for line in open(file).readlines():
        hit = re.search(r"^ *resi[^ \t]*[ \t]+([^ \t\n!]+)",line,
                        re.IGNORECASE)
        if hit:
            ret.append( hit.group(1) )
            pass
        pass
    return ret


deduceSeqType_first=True
def deduceSeqType(seq):
    """
    given a 3 character residue name, determine whether it's protein or
    nucleic acid. DNA/RNA disambiguation is not always possible. The
    default is RNA.

    To make this determination, residue names are obtain from the current
    values of the protein and nucleic topology files (set in <m protocol>).
    """
    ret='unknown'
    if not seq: return ret

    global deduceSeqType_first
    import protocol
    if deduceSeqType_first:
        deduceSeqType_first=False
        for key in residueTypes.keys():
            residueTypes[key] += grabResidueNames(protocol.topology[key])
	    #add in single character residue names
	    for singleCharResname in residueMap.keys():
	        if residueMap[singleCharResname] in residueTypes[key]:
		    residueTypes[key].append(singleCharResname)
		    pass
	        pass
            #change keys: prot --> protein, dna --> nucleic
            pass
        pass
    
    firstRes=seq[0]
    if len(firstRes)>4:
        raise Exception("invalid residue name: " + firstRes)
    for type in residueTypes.keys():
        if firstRes.strip() in residueTypes[type]: ret=type
        pass
    if ret=='unknown': return ret
    for res in seq:
        if len(res)>4:
            raise Exception("invalid residue name: " + res)
        res = res.strip()
        if not res in residueTypes[ret]:
            raise Exception("residue %s not of type %s in %s" %
                            (res,ret,`seq`))
        pass

    # attempt at DNA disambiguation
    if ret=='nucleic':
        ret = 'dna'
        if 'URI' in seq or 'URA' in seq: ret = 'rna'
        pass
    return ret

    
def renameResidues(seq):
    " single letter to three letter names"
    ret = []
    for r in seq:
        r=r.strip()
        if r.upper() in residueMap.keys():
            ret.append( residueMap[r] )
        else:
            ret.append(r)
            pass
        pass
    return ret

def cisPeptide(startResid,
                 segName='    '):
    """
    given a startResid, set up topology to make a cis peptide bond between
    residues numbers startResid and startResid+1.

    Call this routine after seqToPSF.

    This function correctly treats bonds involving proline and non-proline
    residues.

    The optional segName argument argument should be specified if there is
    more than one segment in the structure.
    """
    patch="CISP"
    from atomSel import AtomSel
    if len( AtomSel('resname PRO and resid %d and segid "%s"' % (startResid+1,
                                                                 segName)) ):
        patch="CIPP"
        pass
    from xplorSimulation import getXplorSimulation
    xplor=getXplorSimulation()
    xplor.command('''
    patch %s
    reference=-=( resid %d and segid "%s")
    reference=+=( resid %d and segid "%s") 
    end
    '''% (patch,startResid,segName,startResid+1,segName))

def dAmino(resid,
           segName='    '):
    """
    change given residue to a D-amino acid.

    Call this routine after seqToPSF.

    The optional segName argument argument should be specified if there is
    more than one segment in the structure.
    """
    from xplorSimulation import getXplorSimulation
    xplor=getXplorSimulation()
    xplor.command('''
    patch LtoD 
      reference=nil=( resid %d and segid "%s")
    end''' % (resid,segName))
    return
         

def seqToPSF(seq,
             seqType='auto',
             startResid=1,
             deprotonateHIS=1,
             segName='    ',
             disulfide_bonds=[],
             disulfide_bridges=[],
             amidate_cterm=False,
             ntermPatch="NTER",
             ctermPatch="CTER",
             customRename=False,
             sync=True,
             ):
    r"""given a primary protein or nucleic acid sequence, generate PSF info
    and load the appropriate parameters.

    The seq argument can be a string or the name of a file containing the
    sequence.

    if seqType is auto, the type (protein, dna) is determined from the
    sequence. type 'rna' must be explicitly specified.

    If deprotonateHIS is set, the HD1 atom is deleted from Histidines. This is
    the default.

    If segName is shorter than four characters, leading characters are
    space-padded. It is an error for it to be longer than 4 characeters.

    didulfide bonds are specified by a list of resid pairs in either
    disulfide_bonds or disulfide_bridges. Use disulfide_bonds for actual bonds
    and disulfide_bridges to remove the cysteine HG proton- for representing
    disulfide bonds by NOE restraints.

    Customization of terminal protein residues is handled by the
    amidate_cterm, ntermPatch, and ctermPatch arguments. If amidate_cterm
    is set to true, the c-terminus is amidated, else ctermPatch is used for
    the terminal residue (by default, this adds a second oxygen to the
    c-terminal carbon). A custom patch can also be specified for the
    n-terminus using the ntermPatch arguement (by default the terminal
    nitrogen atom will have three protons, or two if it's a proline).
    Patch names must be specified in toppar/protein.top. Blank values of
    ntermPatch or ctermPatch may be specified to suppress patching termini
    altogether. 

    If customRename is set, certain convenient atom renamings are made for
    nucleic acids:
            ADE H61 --> HN'   
            ADE H62 --> HN''  
            GUA H21 --> HN'   
            GUA H22 --> HN''  
            CYT H41 --> HN'   
            CYT H42 --> HN''  
            THY C5A --> CM

    The sync argument specifies whether XPLOR arrays are copied back to
    the C++ interface. This should almost always be the default value, True
    """
    from xplorSimulation import getXplorSimulation
    xplor=getXplorSimulation()
    from string import join
    from simulationWorld import SimulationWorld_world as simWorld

    outputState=xplor.disableOutput()

    if len(segName)<4:
        segName="%-4s"%segName
    elif len(segName)>4:
        raise Exception("segName (%s) is too long" % segName)

    import os
    if type(seq)==type("string") and os.path.exists(seq):
        seq = file(seq).read()
        pass
            
    if type(seq)==type("string"):
        seq = seq.split()
        pass

    if seqType=='auto': seqType = deduceSeqType(seq)

    if seqType=='prot': seqType = 'protein'


    #split up seq to avoid overlong line
    seqs=[[]]
    ind=0
    cnt=0
    for r in seq:
        if cnt>20:
            ind+=1
            seqs.append([])
            cnt=0
            pass
        seqs[ind].append( r )
        cnt+=1
        pass
    splitSeq = join(map(lambda x:join(x),seqs),'\n')
    #seq = join(seq)

    import protocol

    if seqType=='protein':
        protocol.initTopology("protein")#,reset=1)
        protocol.initParams("protein")

        linkStatement=''
        if "aria" in protocol.topparVersion['protein']:
            linkStatement+="LINK PPGP HEAD - GLY TAIL + PRO END\n"
            linkStatement+="LINK PPGG HEAD - GLY TAIL + GLY END\n"
            linkStatement+="LINK PPG1 HEAD - *   TAIL + GLY END\n"
            linkStatement+="LINK PPG2 HEAD - GLY TAIL + *   END\n"
            pass
        linkStatement+="LINK PEPP  HEAD - *   TAIL + PRO   END\n"

        if amidate_cterm:
            ctermPatch = "CTN "
            pass
        if ctermPatch:
            cterm = "LAST  %s   HEAD - *                    END" % ctermPatch
        else:
            cterm=''
            pass
            
        if seq[0]=='ACE':
            ntermPatch=''

        if ntermPatch=="NTER":
            nterm = r"""
            FIRSt PROP                TAIL + PRO     END { nter for PRO }
            FIRSt NTER                TAIL + *       END
            """
        elif ntermPatch:
            nterm = r"""
            FIRSt %s                TAIL + *       END
            """ % ntermPatch
        else:
            nterm=''
            

        xplor.fastCommand('''
        REMARKS  autogenerated by psfGen.py
        segment
        name="%s"
        SETUP=TRUE
        number=%d
        ''' % (segName, startResid) + '''
        chain
        %s''' % linkStatement
        + r'''
        LINK PEPT    HEAD - *     TAIL + *       END

        %s
        
        
        %s

        
        sequence %s
        end
        end
        end
        ''' % (nterm,cterm,splitSeq))

        if deprotonateHIS:
            xplor.fastCommand("delete select (name hd1 and resname his) end")
            pass

    elif seqType=='dna':
        protocol.initTopology("nucleic")#,reset=1)
        protocol.initParams("nucleic")

        xplor.fastCommand('''
        segment
        name="%s"
        SETUP=TRUE
        number=%d
        ''' % (segName, startResid) + r'''
        chain
        REMARKS  TOPH11.NUC  MACRO for RNA/DNA sequence
        REMARKS  autogenerated by psfGen.py

        ! this is a macro to define standard DNA/RNA polynucleotide bonds
        ! and termini to generate a sequence.
        ! it should be added as @TOPTRNA8.NUC in the SEGMent SEQUence
        ! level.
        ! 
        ! Axel Brunger, 17-AUG-84


        LINK NUC  HEAD - *  TAIL + *  END
        
        FIRST  5ter  TAIL + * END

        LAST 3TER  HEAD - * END
        
        ''' + '''
        sequence %s
        end
        end
        end''' % splitSeq)


        for i in range(startResid,startResid+len(seq)):
            xplor.fastCommand('''patch DEOX
                                 reference=NIL=( segid "%4s" and resid %d )
                             end''' % (segName,i))
            pass

        if customRename:
            # rename some atoms
            xplor.fastCommand(r'''
            vector do (name "HN'")  ( NAME H61 AND RESNAME ADE )
            vector do (name "HN''")  ( name H62 and resname ADE )
            vector do (name "HN'")  ( name H21 and resname GUA )
            vector do (name "HN''")  ( name H22 and resname GUA )
            vector do (name "HN'")  ( name H41 and resname CYT )
            vector do (name "HN''")  ( name H42 and resname CYT )
            vector do (name "CM")   ( name C5A and resname THY )
            ''')
            pass
        
    elif seqType=='rna':
        protocol.initTopology("nucleic")#,reset=1)
        protocol.initParams("nucleic")

        xplor.fastCommand('''
        segment
        name="%s"
        SETUP=TRUE
        number=%d
        ''' % (segName, startResid) + r'''
        chain
        REMARKS  TOPH11.NUC  MACRO for RNA/DNA sequence
        REMARKS  autogenerated by psfGen.py

        ! this is a macro to define standard DNA/RNA polynucleotide bonds
        ! and termini to generate a sequence.
        ! it should be added as @TOPTRNA8.NUC in the SEGMent SEQUence
        ! level.
        ! 
        ! Axel Brunger, 17-AUG-84


        LINK NUC  HEAD - *  TAIL + *  END
        
        FIRST  5ter  TAIL + * END

        LAST 3TER  HEAD - * END
        
        ''' + '''
        sequence %s
        end
        end
        end''' % splitSeq)


        if customRename:
            # rename some atoms
            xplor.fastCommand(r'''
            vector do (name "HN'")  ( NAME H61 AND RESNAME ADE )
            vector do (name "HN''")  ( name H62 and resname ADE )
            vector do (name "HN'")  ( name H21 and resname GUA )
            vector do (name "HN''")  ( name H22 and resname GUA )
            vector do (name "HN'")  ( name H41 and resname CYT )
            vector do (name "HN''")  ( name H42 and resname CYT )
            vector do (name "CM")   ( name C5A and resname THY )
            ''')
            pass
        
    elif seqType=='water' or seqType=='metal':
        protocol.initTopology(seqType)
        protocol.initParams(seqType)

        xplor.fastCommand('''
        segment
        name="%s"
        SETUP=TRUE
        number=%d
        ''' % (segName, startResid) + r'''
        chain
        sequence %s
        end
        end
        end''' % splitSeq)
        pass
    else:
        from simulationWorld  import SimulationWorld_world as simWorld
        if simWorld().logLevel()!='none':
            print "seqToPSF: Warning: ",\
                  "unsupported sequence type: %s:" % seqType, splitSeq
        pass

    for (res1,res2) in disulfide_bonds:
        xplor.fastCommand('''
        patch DISU 
          reference=1=( segid "%4s" and resid %d )  
          reference=2=( segid "%4s" and resid %d )  
        end''' % (segName,res1,segName,res2))
	pass
    
    for (res1,res2) in disulfide_bridges:
	xplor.fastCommand('''
        patch DISN
          reference=1=( segid "%4s" and resid %d )  
          reference=2=( segid "%4s" and resid %d )  
        end''' % (segName,res1,segName,res2))

    xplor.enableOutput(outputState)

    if sync:
        xplor.syncFrom()
        import simulation
        simulation.syncAllSimulations()
        pass
    
    return

def renameAtoms(sel='all'):
    from atomSel import AtomSel
    if type(sel)==type('string'): sel = AtomSel(sel)

    for atom in sel:
        name=atom.atomName()

        if name.find("*")>=0: name = name.replace("*","'")

        atom.setAtomName(name)
        pass
    return


def pdbToPSF(pdbRecord,psfFilename='',
             customRename=False,
             processSSBonds=True,
             processBiomt=False):
    """given a PDB record, generate XPLOR PSF structure/topology info.

    pdbRecord can be a filename or a string containing PDB contents.

    processSSBonds specifies whether the SSBOND pdb record is used in PDB
    generation.

    See seqToPSF for documentation on customRename and processBiomt.
    """
    from xplorSimulation import getXplorSimulation
    xplor=getXplorSimulation()

    try:
        import os
        os.stat(pdbRecord)
        pdbRecord = open(pdbRecord).read()
    except:
        pass

    outputState=xplor.disableOutput()
    pdbToSeqRet= pdbToSeq(pdbRecord,processBiomt=processBiomt)
    sequences=pdbToSeqRet.seqs

    for (beginResid, segid, seq) in sequences:
        seq = renameResidues(seq)
        if segid=='*': segid=''
        #print type, beginResid, segid, seq
        seqToPSF(seq,seqType='auto',startResid=beginResid,segName=segid,
                 deprotonateHIS=False,customRename=customRename,
                 sync=False)
        pass
    
    biomt = pdbToSeqRet.biomt
    if biomt:
        for label in biomt.labels():
            for chain in biomt.chains:
                duplicateSegment(chain,chain+label)
                pass
            pass
        pass
    pass

    xplor.syncFrom()
    import simulation
    simulation.syncAllSimulations()

    if processSSBonds:
        autoProcessPdbSSBonds(pdbRecord)
        pass
    
    if psfFilename:
        xplor.command("write psf output=%s end" % psfFilename)

    #renameAtoms()
    xplor.enableOutput(outputState)
    return pdbToSeqRet.biomt

def addDisulfideBond(sel1,sel2):
    """ add a disulfide bond between residues in the two atom selections

    Should be called after PSF information is generated.

    If atoms required for the given disulfide bond are not present, a
    UserWarning exception is thrown.
    """

    from atomSel import AtomSel
    if isinstance(sel1,str): sel1 = AtomSel(sel1)
    if isinstance(sel2,str): sel2 = AtomSel(sel2)

    if sel1.simulation().name() != sel2.simulation().name():
        raise Exception("no disulfide bond between different Simulations!")

    from xplorSimulation import getXplorSimulation    
    xSim = getXplorSimulation(sel1.simulation())

    if (len(sel1)==0 or
        len(AtomSel('segid "%s" and resid %d and name SG'%
                    (sel1[0].segmentName(),sel1[0].residueNum())))==0):
        raise UserWarning("No sulfur atom in "+sel1.string())
    if (len(sel2)==0 or
        len(AtomSel('segid "%s" and resid %d and name SG'%
                    (sel2[0].segmentName(),sel2[0].residueNum())))==0):
        raise UserWarning("No sulfur atom in "+sel2.string())
    
    xSim.command("""patch
                       DISU reference=1=( segid "%s" and resid %d )
                            reference=2=( segid "%s" and resid %d )
                     end""" % (sel1[0].segmentName(),sel1[0].residueNum(),
                               sel2[0].segmentName(),sel2[0].residueNum()))
    return

def duplicateSegment(segid,
                     newSegid):
    """
    generate psf info for a new segment which is identical to an existing
    segment, with only different segid.
    """
    from xplorSimulation import getXplorSimulation
    xsim=getXplorSimulation()
    xsim.command('duplicate selection=(segid "%s") segid=%s end' %
                 (segid,newSegid))
    return        
    

class Biomt:
    """
    class containing info from a BIOMT record for generating new chains related
    by rotation+ translation to specified ATOM entries.

    Members are chains, rot, trans.
    """

    class BiomtEntry:
        def __init__(s,rot,trans):
            s.rot=rot
            s.trans=trans
            return
        pass
    def __init__(s,chains):
        s.chains=chains
        s.entries={}
        return
    def addEntry(s,key,rot,trans):
        s.entries[key] = s.BiomtEntry(rot,trans)
        return
    def labels(s):
        ret=s.entries.keys()
        ret.sort()
        return ret
    def entry(s,label):
        return s.entries[label]
        
    pass

def processBiomtEntries(remark350,
                        biomol=1):
    """
    Read info for symmetric subunits generated by BIOMT entries.

    This will read the entry for the biomolecule labelled biomol.

    Non-trivial (rotation/translation) transformations are read and stored
    along with the specified chain ids, and returned in a Biomt object
    """
    entries=[]
    for line in remark350:
        entries.append(line[10:].strip())
        pass
    ret = None
    curMolecule=False
    thisEntry=[]
    for entry in entries:
        if entry.startswith("BIOMOLECULE:"):
            cbiomol = int(entry.split(':')[1])
            print cbiomol
            if biomol==cbiomol:
                curMolecule=True
            else:
                curMolecule=False
                pass
            pass
        if curMolecule:
            thisEntry.append(entry)
            pass
        pass
    if not thisEntry:
        raise Exception("BIOMT record for Biomolecule %d not found" % biomol)

    for entry in thisEntry:
        if entry.startswith("APPLY THE FOLLOWING TO CHAINS:"):
            chains = entry.split(':')[1]
        elif entry.startswith("AND CHAINS:"):
            chains += entry.split(':')[1]
            pass
        pass
    chains = map(lambda c:c.strip(), chains.split(','))
    ret = Biomt(chains)
    lines={}
    for entry in thisEntry:
        if entry.startswith('BIOMT'):
            (xyz,label,r1,r2,r3,t) = entry[5:].split()
            if not label in lines.keys():
                lines[label]={}
                pass
            lines[label][xyz] = map(lambda s:float(s), [r1,r2,r3,t])
            pass
        pass
    from mat3 import Mat3
    from vec3 import Vec3
    biomts={}
    for l in lines.keys():
        rot= Mat3(lines[l]['1'][0],lines[l]['1'][1],lines[l]['1'][2],
                  lines[l]['2'][0],lines[l]['2'][1],lines[l]['2'][2],
                  lines[l]['3'][0],lines[l]['3'][1],lines[l]['3'][2])
        trans= Vec3(lines[l]['1'][3],lines[l]['2'][3],lines[l]['3'][3])
        
        #remove trivial entries
        test=Mat3(1,0,0,
                  0,1,0,
                  0,0,1)-rot
        trivial=True
        for i in range(3):
            if abs(trans[i])>1e-3:
                trivial=False
            for j in range(3):
                if abs(test[i,j])> 1e-3:
                    trivial=False
                    pass
                pass
            pass
        if not trivial:
            ret.addEntry(l,rot,trans)
            pass

    print "BIOMT record processed for Biomolecule %d" % biomol

    return ret